Merge branch 'main' of github.com:cmdr2/InvokeAI

This commit is contained in:
cmdr2 2022-10-29 18:42:18 +05:30
commit 94cf660848
99 changed files with 4012 additions and 1577 deletions

View File

@ -1,26 +1,43 @@
name: Create Caches
on:
workflow_dispatch
on: workflow_dispatch
jobs:
build:
os_matrix:
strategy:
matrix:
os: [ ubuntu-latest, macos-12 ]
name: Create Caches on ${{ matrix.os }} conda
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
environment-file: environment.yml
default-shell: bash -l {0}
- os: macos-latest
environment-file: environment-mac.yml
default-shell: bash -l {0}
name: Test invoke.py on ${{ matrix.os }} with conda
runs-on: ${{ matrix.os }}
defaults:
run:
shell: ${{ matrix.default-shell }}
steps:
- name: Set platform variables
id: vars
run: |
if [ "$RUNNER_OS" = "macOS" ]; then
echo "::set-output name=ENV_FILE::environment-mac.yml"
echo "::set-output name=PYTHON_BIN::/usr/local/miniconda/envs/ldm/bin/python"
elif [ "$RUNNER_OS" = "Linux" ]; then
echo "::set-output name=ENV_FILE::environment.yml"
echo "::set-output name=PYTHON_BIN::/usr/share/miniconda/envs/ldm/bin/python"
fi
- name: Checkout sources
uses: actions/checkout@v3
- name: setup miniconda
uses: conda-incubator/setup-miniconda@v2
with:
auto-activate-base: false
auto-update-conda: false
miniconda-version: latest
- name: set environment
run: |
[[ "$GITHUB_REF" == 'refs/heads/main' ]] \
&& echo "TEST_PROMPTS=tests/preflight_prompts.txt" >> $GITHUB_ENV \
|| echo "TEST_PROMPTS=tests/dev_prompts.txt" >> $GITHUB_ENV
echo "CONDA_ROOT=$CONDA" >> $GITHUB_ENV
echo "CONDA_ENV_NAME=invokeai" >> $GITHUB_ENV
- name: Use Cached Stable Diffusion v1.4 Model
id: cache-sd-v1-4
uses: actions/cache@v3
@ -29,42 +46,52 @@ jobs:
with:
path: models/ldm/stable-diffusion-v1/model.ckpt
key: ${{ env.cache-name }}
restore-keys: |
${{ env.cache-name }}
restore-keys: ${{ env.cache-name }}
- name: Download Stable Diffusion v1.4 Model
if: ${{ steps.cache-sd-v1-4.outputs.cache-hit != 'true' }}
run: |
if [ ! -e models/ldm/stable-diffusion-v1 ]; then
mkdir -p models/ldm/stable-diffusion-v1
fi
if [ ! -e models/ldm/stable-diffusion-v1/model.ckpt ]; then
curl -o models/ldm/stable-diffusion-v1/model.ckpt ${{ secrets.SD_V1_4_URL }}
fi
- name: Use Cached Dependencies
id: cache-conda-env-ldm
[[ -d models/ldm/stable-diffusion-v1 ]] \
|| mkdir -p models/ldm/stable-diffusion-v1
[[ -r models/ldm/stable-diffusion-v1/model.ckpt ]] \
|| curl -o models/ldm/stable-diffusion-v1/model.ckpt ${{ secrets.SD_V1_4_URL }}
- name: Use cached Conda Environment
uses: actions/cache@v3
env:
cache-name: cache-conda-env-ldm
cache-name: cache-conda-env-${{ env.CONDA_ENV_NAME }}
conda-env-file: ${{ matrix.environment-file }}
with:
path: ~/.conda/envs/ldm
path: ${{ env.CONDA_ROOT }}/envs/${{ env.CONDA_ENV_NAME }}
key: ${{ env.cache-name }}
restore-keys: |
${{ env.cache-name }}-${{ runner.os }}-${{ hashFiles(steps.vars.outputs.ENV_FILE) }}
- name: Install Dependencies
if: ${{ steps.cache-conda-env-ldm.outputs.cache-hit != 'true' }}
run: |
conda env create -f ${{ steps.vars.outputs.ENV_FILE }}
- name: Use Cached Huggingface and Torch models
id: cache-huggingface-torch
restore-keys: ${{ env.cache-name }}-${{ runner.os }}-${{ hashFiles(env.conda-env-file) }}
- name: Use cached Conda Packages
uses: actions/cache@v3
env:
cache-name: cache-huggingface-torch
cache-name: cache-conda-env-${{ env.CONDA_ENV_NAME }}
conda-env-file: ${{ matrix.environment-file }}
with:
path: ${{ env.CONDA_PKGS_DIR }}
key: ${{ env.cache-name }}
restore-keys: ${{ env.cache-name }}-${{ runner.os }}-${{ hashFiles(env.conda-env-file) }}
- name: Activate Conda Env
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: ${{ env.CONDA_ENV_NAME }}
environment-file: ${{ matrix.environment-file }}
- name: Use Cached Huggingface and Torch models
id: cache-hugginface-torch
uses: actions/cache@v3
env:
cache-name: cache-hugginface-torch
with:
path: ~/.cache
key: ${{ env.cache-name }}
restore-keys: |
${{ env.cache-name }}-${{ hashFiles('scripts/preload_models.py') }}
- name: Download Huggingface and Torch models
if: ${{ steps.cache-huggingface-torch.outputs.cache-hit != 'true' }}
run: |
${{ steps.vars.outputs.PYTHON_BIN }} scripts/preload_models.py
- name: run preload_models.py
run: python scripts/preload_models.py

40
.github/workflows/mkdocs-material.yml vendored Normal file
View File

@ -0,0 +1,40 @@
name: mkdocs-material
on:
push:
branches:
- 'main'
- 'development'
jobs:
mkdocs-material:
runs-on: ubuntu-latest
steps:
- name: checkout sources
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: setup python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: install requirements
run: |
python -m \
pip install -r requirements-mkdocs.txt
- name: confirm buildability
run: |
python -m \
mkdocs build \
--clean \
--verbose
- name: deploy to gh-pages
if: ${{ github.ref == 'refs/heads/main' }}
run: |
python -m \
mkdocs gh-deploy \
--clean \
--force

View File

@ -4,29 +4,55 @@ on:
branches:
- 'main'
- 'development'
- 'fix-gh-actions-fork'
pull_request:
branches:
- 'main'
- 'development'
jobs:
os_matrix:
strategy:
matrix:
os: [ ubuntu-latest, macos-12 ]
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
environment-file: environment.yml
default-shell: bash -l {0}
- os: macos-latest
environment-file: environment-mac.yml
default-shell: bash -l {0}
name: Test invoke.py on ${{ matrix.os }} with conda
runs-on: ${{ matrix.os }}
defaults:
run:
shell: ${{ matrix.default-shell }}
steps:
- run: |
echo The PR was merged
- name: Set platform variables
id: vars
run: |
# Note, can't "activate" via github action; specifying the env's python has the same effect
if [ "$RUNNER_OS" = "macOS" ]; then
echo "::set-output name=ENV_FILE::environment-mac.yml"
echo "::set-output name=PYTHON_BIN::/usr/local/miniconda/envs/ldm/bin/python"
elif [ "$RUNNER_OS" = "Linux" ]; then
echo "::set-output name=ENV_FILE::environment.yml"
echo "::set-output name=PYTHON_BIN::/usr/share/miniconda/envs/ldm/bin/python"
fi
- name: Checkout sources
uses: actions/checkout@v3
- name: setup miniconda
uses: conda-incubator/setup-miniconda@v2
with:
auto-activate-base: false
auto-update-conda: false
miniconda-version: latest
- name: set test prompt to main branch validation
if: ${{ github.ref == 'refs/heads/main' }}
run: echo "TEST_PROMPTS=tests/preflight_prompts.txt" >> $GITHUB_ENV
- name: set test prompt to development branch validation
if: ${{ github.ref == 'refs/heads/development' }}
run: echo "TEST_PROMPTS=tests/dev_prompts.txt" >> $GITHUB_ENV
- name: set test prompt to Pull Request validation
if: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/development' }}
run: echo "TEST_PROMPTS=tests/pr_prompt.txt" >> $GITHUB_ENV
- name: set conda environment name
run: echo "CONDA_ENV_NAME=invokeai" >> $GITHUB_ENV
- name: Use Cached Stable Diffusion v1.4 Model
id: cache-sd-v1-4
uses: actions/cache@v3
@ -35,31 +61,40 @@ jobs:
with:
path: models/ldm/stable-diffusion-v1/model.ckpt
key: ${{ env.cache-name }}
restore-keys: |
${{ env.cache-name }}
restore-keys: ${{ env.cache-name }}
- name: Download Stable Diffusion v1.4 Model
if: ${{ steps.cache-sd-v1-4.outputs.cache-hit != 'true' }}
run: |
if [ ! -e models/ldm/stable-diffusion-v1 ]; then
mkdir -p models/ldm/stable-diffusion-v1
fi
if [ ! -e models/ldm/stable-diffusion-v1/model.ckpt ]; then
curl -o models/ldm/stable-diffusion-v1/model.ckpt ${{ secrets.SD_V1_4_URL }}
fi
- name: Use Cached Dependencies
id: cache-conda-env-ldm
[[ -d models/ldm/stable-diffusion-v1 ]] \
|| mkdir -p models/ldm/stable-diffusion-v1
[[ -r models/ldm/stable-diffusion-v1/model.ckpt ]] \
|| curl -o models/ldm/stable-diffusion-v1/model.ckpt ${{ secrets.SD_V1_4_URL }}
- name: Use cached Conda Environment
uses: actions/cache@v3
env:
cache-name: cache-conda-env-ldm
cache-name: cache-conda-env-${{ env.CONDA_ENV_NAME }}
conda-env-file: ${{ matrix.environment-file }}
with:
path: ~/.conda/envs/ldm
key: ${{ env.cache-name }}
restore-keys: |
${{ env.cache-name }}-${{ runner.os }}-${{ hashFiles(steps.vars.outputs.ENV_FILE) }}
- name: Install Dependencies
if: ${{ steps.cache-conda-env-ldm.outputs.cache-hit != 'true' }}
run: |
conda env create -f ${{ steps.vars.outputs.ENV_FILE }}
path: ${{ env.CONDA }}/envs/${{ env.CONDA_ENV_NAME }}
key: env-${{ env.cache-name }}-${{ runner.os }}-${{ hashFiles(env.conda-env-file) }}
- name: Use cached Conda Packages
uses: actions/cache@v3
env:
cache-name: cache-conda-pkgs-${{ env.CONDA_ENV_NAME }}
conda-env-file: ${{ matrix.environment-file }}
with:
path: ${{ env.CONDA_PKGS_DIR }}
key: pkgs-${{ env.cache-name }}-${{ runner.os }}-${{ hashFiles(env.conda-env-file) }}
- name: Activate Conda Env
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: ${{ env.CONDA_ENV_NAME }}
environment-file: ${{ matrix.environment-file }}
- name: Use Cached Huggingface and Torch models
id: cache-hugginface-torch
uses: actions/cache@v3
@ -70,28 +105,22 @@ jobs:
key: ${{ env.cache-name }}
restore-keys: |
${{ env.cache-name }}-${{ hashFiles('scripts/preload_models.py') }}
- name: Download Huggingface and Torch models
if: ${{ steps.cache-hugginface-torch.outputs.cache-hit != 'true' }}
run: |
${{ steps.vars.outputs.PYTHON_BIN }} scripts/preload_models.py
# - name: Run tmate
# uses: mxschmitt/action-tmate@v3
# timeout-minutes: 30
- name: run preload_models.py
run: python scripts/preload_models.py
- name: Run the tests
run: |
# Note, can't "activate" via github action; specifying the env's python has the same effect
if [ $(uname) = "Darwin" ]; then
export PYTORCH_ENABLE_MPS_FALLBACK=1
fi
# Utterly hacky, but I don't know how else to do this
if [[ ${{ github.ref }} == 'refs/heads/master' ]]; then
time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/invoke.py --from_file tests/preflight_prompts.txt
elif [[ ${{ github.ref }} == 'refs/heads/development' ]]; then
time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/invoke.py --from_file tests/dev_prompts.txt
fi
time python scripts/invoke.py \
--from_file ${{ env.TEST_PROMPTS }}
- name: export conda env
run: |
mkdir -p outputs/img-samples
conda env export --name ${{ env.CONDA_ENV_NAME }} > outputs/img-samples/environment-${{ runner.os }}.yml
- name: Archive results
uses: actions/upload-artifact@v3
with:
name: results
name: results_${{ matrix.os }}
path: outputs/img-samples

2
.gitignore vendored
View File

@ -1,7 +1,7 @@
# ignore default image save location and model symbolic link
outputs/
models/ldm/stable-diffusion-v1/model.ckpt
ldm/dream/restoration/codeformer/weights
ldm/invoke/restoration/codeformer/weights
# ignore the Anaconda/Miniconda installer used while building Docker image
anaconda.sh

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
Copyright (c) 2022 Lincoln Stein and InvokeAI Organization
This software is derived from a fork of the source code available from
https://github.com/pesser/stable-diffusion and

View File

@ -2,7 +2,7 @@
# InvokeAI: A Stable Diffusion Toolkit
_Formally known as lstein/stable-diffusion_
_Formerly known as lstein/stable-diffusion_
![project logo](docs/assets/logo.png)
@ -134,6 +134,11 @@ you can try starting `invoke.py` with the `--precision=float32` flag:
### Latest Changes
- v2.0.1 (13 October 2022)
- fix noisy images at high step count when using k* samplers
- dream.py script now calls invoke.py module directly rather than
via a new python process (which could break the environment)
- v2.0.0 (9 October 2022)
- `dream.py` script renamed `invoke.py`. A `dream.py` script wrapper remains

BIN
assets/caution.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

View File

@ -257,14 +257,14 @@ class InvokeAIWebServer:
@socketio.on('generateImage')
def handle_generate_image_event(
generation_parameters, esrgan_parameters, gfpgan_parameters
generation_parameters, esrgan_parameters, facetool_parameters
):
try:
print(
f'>> Image generation requested: {generation_parameters}\nESRGAN parameters: {esrgan_parameters}\nGFPGAN parameters: {gfpgan_parameters}'
f'>> Image generation requested: {generation_parameters}\nESRGAN parameters: {esrgan_parameters}\nFacetool parameters: {facetool_parameters}'
)
self.generate_images(
generation_parameters, esrgan_parameters, gfpgan_parameters
generation_parameters, esrgan_parameters, facetool_parameters
)
except Exception as e:
self.socketio.emit('error', {'message': (str(e))})
@ -300,9 +300,11 @@ class InvokeAIWebServer:
)
if postprocessing_parameters['type'] == 'esrgan':
progress.set_current_status('Upscaling')
progress.set_current_status('Upscaling (ESRGAN)')
elif postprocessing_parameters['type'] == 'gfpgan':
progress.set_current_status('Restoring Faces')
progress.set_current_status('Restoring Faces (GFPGAN)')
elif postprocessing_parameters['type'] == 'codeformer':
progress.set_current_status('Restoring Faces (Codeformer)')
socketio.emit('progressUpdate', progress.to_formatted_dict())
eventlet.sleep(0)
@ -319,9 +321,17 @@ class InvokeAIWebServer:
elif postprocessing_parameters['type'] == 'gfpgan':
image = self.gfpgan.process(
image=image,
strength=postprocessing_parameters['gfpgan_strength'],
strength=postprocessing_parameters['facetool_strength'],
seed=seed,
)
elif postprocessing_parameters['type'] == 'codeformer':
image = self.codeformer.process(
image=image,
strength=postprocessing_parameters['facetool_strength'],
fidelity=postprocessing_parameters['codeformer_fidelity'],
seed=seed,
device='cpu' if str(self.generate.device) == 'mps' else self.generate.device
)
else:
raise TypeError(
f'{postprocessing_parameters["type"]} is not a valid postprocessing type'
@ -448,7 +458,7 @@ class InvokeAIWebServer:
}
def generate_images(
self, generation_parameters, esrgan_parameters, gfpgan_parameters
self, generation_parameters, esrgan_parameters, facetool_parameters
):
try:
self.canceled.clear()
@ -551,7 +561,7 @@ class InvokeAIWebServer:
nonlocal generation_parameters
nonlocal esrgan_parameters
nonlocal gfpgan_parameters
nonlocal facetool_parameters
nonlocal progress
step_index = 1
@ -611,23 +621,41 @@ class InvokeAIWebServer:
if self.canceled.is_set():
raise CanceledException
if gfpgan_parameters:
progress.set_current_status('Restoring Faces')
if facetool_parameters:
if facetool_parameters['type'] == 'gfpgan':
progress.set_current_status('Restoring Faces (GFPGAN)')
elif facetool_parameters['type'] == 'codeformer':
progress.set_current_status('Restoring Faces (Codeformer)')
progress.set_current_status_has_steps(False)
self.socketio.emit(
'progressUpdate', progress.to_formatted_dict()
)
eventlet.sleep(0)
image = self.gfpgan.process(
image=image,
strength=gfpgan_parameters['strength'],
seed=seed,
)
if facetool_parameters['type'] == 'gfpgan':
image = self.gfpgan.process(
image=image,
strength=facetool_parameters['strength'],
seed=seed,
)
elif facetool_parameters['type'] == 'codeformer':
image = self.codeformer.process(
image=image,
strength=facetool_parameters['strength'],
fidelity=facetool_parameters['codeformer_fidelity'],
seed=seed,
device='cpu' if str(self.generate.device) == 'mps' else self.generate.device,
)
all_parameters['codeformer_fidelity'] = facetool_parameters['codeformer_fidelity']
postprocessing = True
all_parameters['gfpgan_strength'] = gfpgan_parameters[
all_parameters['facetool_strength'] = facetool_parameters[
'strength'
]
all_parameters['facetool_type'] = facetool_parameters[
'type'
]
progress.set_current_status('Saving Image')
self.socketio.emit(
@ -723,6 +751,7 @@ class InvokeAIWebServer:
'height',
'extra',
'seamless',
'hires_fix',
]
rfc_dict = {}
@ -735,14 +764,16 @@ class InvokeAIWebServer:
postprocessing = []
# 'postprocessing' is either null or an
if 'gfpgan_strength' in parameters:
postprocessing.append(
{
'type': 'gfpgan',
'strength': float(parameters['gfpgan_strength']),
if 'facetool_strength' in parameters:
facetool_parameters = {
'type': str(parameters['facetool_type']),
'strength': float(parameters['facetool_strength']),
}
)
if parameters['facetool_type'] == 'codeformer':
facetool_parameters['fidelity'] = float(parameters['codeformer_fidelity'])
postprocessing.append(facetool_parameters)
if 'upscale' in parameters:
postprocessing.append(
@ -761,7 +792,7 @@ class InvokeAIWebServer:
rfc_dict['sampler'] = parameters['sampler_name']
# display weighted subprompts (liable to change)
subprompts = split_weighted_subprompts(parameters['prompt'])
subprompts = split_weighted_subprompts(parameters['prompt'], skip_normalize=True)
subprompts = [{'prompt': x[0], 'weight': x[1]} for x in subprompts]
rfc_dict['prompt'] = subprompts
@ -837,8 +868,15 @@ class InvokeAIWebServer:
elif parameters['type'] == 'gfpgan':
postprocessing_metadata['type'] = 'gfpgan'
postprocessing_metadata['strength'] = parameters[
'gfpgan_strength'
'facetool_strength'
]
elif parameters['type'] == 'codeformer':
postprocessing_metadata['type'] = 'codeformer'
postprocessing_metadata['strength'] = parameters[
'facetool_strength'
]
postprocessing_metadata['fidelity'] = parameters['codeformer_fidelity']
else:
raise TypeError(f"Invalid type: {parameters['type']}")

View File

@ -36,6 +36,8 @@ def parameters_to_command(params):
switches.append(f'-A {params["sampler_name"]}')
if "seamless" in params and params["seamless"] == True:
switches.append(f"--seamless")
if "hires_fix" in params and params["hires_fix"] == True:
switches.append(f"--hires")
if "init_img" in params and len(params["init_img"]) > 0:
switches.append(f'-I {params["init_img"]}')
if "init_mask" in params and len(params["init_mask"]) > 0:
@ -46,8 +48,14 @@ def parameters_to_command(params):
switches.append(f'-f {params["strength"]}')
if "fit" in params and params["fit"] == True:
switches.append(f"--fit")
if "gfpgan_strength" in params and params["gfpgan_strength"]:
if "facetool" in params:
switches.append(f'-ft {params["facetool"]}')
if "facetool_strength" in params and params["facetool_strength"]:
switches.append(f'-G {params["facetool_strength"]}')
elif "gfpgan_strength" in params and params["gfpgan_strength"]:
switches.append(f'-G {params["gfpgan_strength"]}')
if "codeformer_fidelity" in params:
switches.append(f'-cf {params["codeformer_fidelity"]}')
if "upscale" in params and params["upscale"]:
switches.append(f'-U {params["upscale"][0]} {params["upscale"][1]}')
if "variation_amount" in params and params["variation_amount"] > 0:

View File

@ -349,7 +349,7 @@ def handle_run_gfpgan_event(original_image, gfpgan_parameters):
eventlet.sleep(0)
image = gfpgan.process(
image=image, strength=gfpgan_parameters["gfpgan_strength"], seed=seed
image=image, strength=gfpgan_parameters["facetool_strength"], seed=seed
)
progress["currentStatus"] = "Saving image"
@ -464,7 +464,7 @@ def parameters_to_post_processed_image_metadata(parameters, original_image_path,
image["strength"] = parameters["upscale"][1]
elif type == "gfpgan":
image["type"] = "gfpgan"
image["strength"] = parameters["gfpgan_strength"]
image["strength"] = parameters["facetool_strength"]
else:
raise TypeError(f"Invalid type: {type}")
@ -493,6 +493,7 @@ def parameters_to_generated_image_metadata(parameters):
"height",
"extra",
"seamless",
"hires_fix",
]
rfc_dict = {}
@ -505,10 +506,10 @@ def parameters_to_generated_image_metadata(parameters):
postprocessing = []
# 'postprocessing' is either null or an
if "gfpgan_strength" in parameters:
if "facetool_strength" in parameters:
postprocessing.append(
{"type": "gfpgan", "strength": float(parameters["gfpgan_strength"])}
{"type": "gfpgan", "strength": float(parameters["facetool_strength"])}
)
if "upscale" in parameters:
@ -751,7 +752,7 @@ def generate_images(generation_parameters, esrgan_parameters, gfpgan_parameters)
image=image, strength=gfpgan_parameters["strength"], seed=seed
)
postprocessing = True
all_parameters["gfpgan_strength"] = gfpgan_parameters["strength"]
all_parameters["facetool_strength"] = gfpgan_parameters["strength"]
progress["currentStatus"] = "Saving image"
socketio.emit("progressUpdate", progress)

View File

@ -1,18 +1,22 @@
# This file describes the alternative machine learning models
# available to the dream script.
# available to the dream script.
#
# To add a new model, follow the examples below. Each
# model requires a model config file, a weights file,
# and the width and height of the images it
# was trained on.
laion400m:
config: configs/latent-diffusion/txt2img-1p4B-eval.yaml
weights: models/ldm/text2img-large/model.ckpt
width: 256
height: 256
stable-diffusion-1.4:
config: configs/stable-diffusion/v1-inference.yaml
weights: models/ldm/stable-diffusion-v1/model.ckpt
width: 512
height: 512
config: configs/stable-diffusion/v1-inference.yaml
weights: models/ldm/stable-diffusion-v1/model.ckpt
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
description: Stable Diffusion inference model version 1.4
width: 512
height: 512
default: true
stable-diffusion-1.5:
config: configs/stable-diffusion/v1-inference.yaml
weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
description: Stable Diffusion inference model version 1.5
width: 512
height: 512

View File

@ -4,6 +4,12 @@ title: Changelog
# :octicons-log-16: **Changelog**
## v2.0.1 (13 October 2022)
- fix noisy images at high step count when using k* samplers
- dream.py script now calls invoke.py module directly rather than
via a new python process (which could break the environment)
## v2.0.0 <small>(9 October 2022)</small>
- `dream.py` script renamed `invoke.py`. A `dream.py` script wrapper remains

Binary file not shown.

After

Width:  |  Height:  |  Size: 519 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 519 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 439 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 338 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

View File

@ -8,7 +8,7 @@ hide:
## **Interactive Command Line Interface**
The `invoke.py` script, located in `scripts/dream.py`, provides an interactive
The `invoke.py` script, located in `scripts/`, provides an interactive
interface to image generation similar to the "invoke mothership" bot that Stable
AI provided on its Discord server.
@ -85,6 +85,8 @@ overridden on a per-prompt basis (see [List of prompt arguments](#list-of-prompt
| `--from_file <path>` | | `None` | Read list of prompts from a file. Use `-` to read from standard input |
| `--model <modelname>` | | `stable-diffusion-1.4` | Loads model specified in configs/models.yaml. Currently one of "stable-diffusion-1.4" or "laion400m" |
| `--full_precision` | `-F` | `False` | Run in slower full-precision mode. Needed for Macintosh M1/M2 hardware and some older video cards. |
| `--png_compression <0-9>` | `-z<0-9>` | 6 | Select level of compression for output files, from 0 (no compression) to 9 (max compression) |
| `--safety-checker` | | False | Activate safety checker for NSFW and other potentially disturbing imagery |
| `--web` | | `False` | Start in web server mode |
| `--host <ip addr>` | | `localhost` | Which network interface web server should listen on. Set to 0.0.0.0 to listen on any. |
| `--port <port>` | | `9090` | Which port web server should listen for requests on. |
@ -96,7 +98,6 @@ overridden on a per-prompt basis (see [List of prompt arguments](#list-of-prompt
| `--embedding_path <path>` | | `None` | Path to pre-trained embedding manager checkpoints, for custom models |
| `--gfpgan_dir` | | `src/gfpgan` | Path to where GFPGAN is installed. |
| `--gfpgan_model_path` | | `experiments/pretrained_models/GFPGANv1.4.pth` | Path to GFPGAN model file, relative to `--gfpgan_dir`. |
| `--device <device>` | `-d<device>` | `torch.cuda.current_device()` | Device to run SD on, e.g. "cuda:0" |
| `--free_gpu_mem` | | `False` | Free GPU memory after sampling, to allow image decoding and saving in low VRAM conditions |
| `--precision` | | `auto` | Set model precision, default is selected by device. Options: auto, float32, float16, autocast |
@ -144,46 +145,48 @@ Here are the invoke> command that apply to txt2img:
| Argument <img width="680" align="right"/> | Shortcut <img width="420" align="right"/> | Default <img width="480" align="right"/> | Description |
|--------------------|------------|---------------------|--------------|
| `"my prompt"` | | | Text prompt to use. The quotation marks are optional. |
| `--width <int>` | `-W<int>` | `512` | Width of generated image |
| `--height <int>` | `-H<int>` | `512` | Height of generated image |
| `--iterations <int>` | `-n<int>` | `1` | How many images to generate from this prompt |
| `--steps <int>` | `-s<int>` | `50` | How many steps of refinement to apply |
| `--cfg_scale <float>`| `-C<float>` | `7.5` | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 |
| `--seed <int>` | `-S<int>` | `None` | Set the random seed for the next series of images. This can be used to recreate an image generated previously.|
| `--sampler <sampler>`| `-A<sampler>`| `k_lms` | Sampler to use. Use -h to get list of available samplers. |
| `--hires_fix` | | | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution |
| `--grid` | `-g` | `False` | Turn on grid mode to return a single image combining all the images generated by this prompt |
| `--individual` | `-i` | `True` | Turn off grid mode (deprecated; leave off `--grid` instead) |
| `--outdir <path>` | `-o<path>` | `outputs/img_samples` | Temporarily change the location of these images |
| `--seamless` | | `False` | Activate seamless tiling for interesting effects |
| `--log_tokenization` | `-t` | `False` | Display a color-coded list of the parsed tokens derived from the prompt |
| `--skip_normalization`| `-x` | `False` | Weighted subprompts will not be normalized. See [Weighted Prompts](./OTHER.md#weighted-prompts) |
| `--upscale <int> <float>` | `-U <int> <float>` | `-U 1 0.75`| Upscale image by magnification factor (2, 4), and set strength of upscaling (0.0-1.0). If strength not set, will default to 0.75. |
| `--gfpgan_strength <float>` | `-G <float>` | `-G0` | Fix faces using the GFPGAN algorithm; argument indicates how hard the algorithm should try (0.0-1.0) |
| `--save_original` | `-save_orig`| `False` | When upscaling or fixing faces, this will cause the original image to be saved rather than replaced. |
| `--variation <float>` |`-v<float>`| `0.0` | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with `-S<seed>` and `-n<int>` to generate a series a riffs on a starting image. See [Variations](./VARIATIONS.md). |
| `--with_variations <pattern>` | `-V<pattern>`| `None` | Combine two or more variations. See [Variations](./VARIATIONS.md) for now to use this. |
| "my prompt" | | | Text prompt to use. The quotation marks are optional. |
| --width <int> | -W<int> | 512 | Width of generated image |
| --height <int> | -H<int> | 512 | Height of generated image |
| --iterations <int> | -n<int> | 1 | How many images to generate from this prompt |
| --steps <int> | -s<int> | 50 | How many steps of refinement to apply |
| --cfg_scale <float>| -C<float> | 7.5 | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 |
| --seed <int> | -S<int> | None | Set the random seed for the next series of images. This can be used to recreate an image generated previously.|
| --sampler <sampler>| -A<sampler>| k_lms | Sampler to use. Use -h to get list of available samplers. |
| --hires_fix | | | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution |
| --png_compression <0-9> | -z<0-9> | 6 | Select level of compression for output files, from 0 (no compression) to 9 (max compression) |
| --grid | -g | False | Turn on grid mode to return a single image combining all the images generated by this prompt |
| --individual | -i | True | Turn off grid mode (deprecated; leave off --grid instead) |
| --outdir <path> | -o<path> | outputs/img_samples | Temporarily change the location of these images |
| --seamless | | False | Activate seamless tiling for interesting effects |
| --seamless_axes | | x,y | Specify which axes to use circular convolution on. |
| --log_tokenization | -t | False | Display a color-coded list of the parsed tokens derived from the prompt |
| --skip_normalization| -x | False | Weighted subprompts will not be normalized. See [Weighted Prompts](./OTHER.md#weighted-prompts) |
| --upscale <int> <float> | -U <int> <float> | -U 1 0.75| Upscale image by magnification factor (2, 4), and set strength of upscaling (0.0-1.0). If strength not set, will default to 0.75. |
| --facetool_strength <float> | -G <float> | -G0 | Fix faces (defaults to using the GFPGAN algorithm); argument indicates how hard the algorithm should try (0.0-1.0) |
| --facetool <name> | -ft <name> | -ft gfpgan | Select face restoration algorithm to use: gfpgan, codeformer |
| --codeformer_fidelity | -cf <float> | 0.75 | Used along with CodeFormer. Takes values between 0 and 1. 0 produces high quality but low accuracy. 1 produces high accuracy but low quality |
| --save_original | -save_orig| False | When upscaling or fixing faces, this will cause the original image to be saved rather than replaced. |
| --variation <float> |-v<float>| 0.0 | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with -S<seed> and -n<int> to generate a series a riffs on a starting image. See [Variations](./VARIATIONS.md). |
| --with_variations <pattern> | | None | Combine two or more variations. See [Variations](./VARIATIONS.md) for now to use this. |
| --save_intermediates <n> | | None | Save the image from every nth step into an "intermediates" folder inside the output directory |
!!! note
Note that the width and height of the image must be multiples of
64. You can provide different values, but they will be rounded down to
the nearest multiple of 64.
The width and height of the image must be multiples of
64. You can provide different values, but they will be rounded down to
the nearest multiple of 64.
### img2img
### This is an example of img2img:
!!! example
~~~~
invoke> waterfall and rainbow -I./vacation-photo.png -W640 -H480 --fit
~~~~
```bash
invoke> waterfall and rainbow -I./vacation-photo.png -W640 -H480 --fit
```
This will modify the indicated vacation photograph by making it more
like the prompt. Results will vary greatly depending on what is in the
image. We also ask to `--fit` the image into a box no bigger than
640x480. Otherwise the image size will be identical to the provided
photo and you may run out of memory if it is large.
This will modify the indicated vacation photograph by making it more
like the prompt. Results will vary greatly depending on what is in the
image. We also ask to --fit the image into a box no bigger than
640x480. Otherwise the image size will be identical to the provided
photo and you may run out of memory if it is large.
In addition to the command-line options recognized by txt2img, img2img
accepts additional options:
@ -210,16 +213,44 @@ accepts additional options:
[Inpainting](./INPAINTING.md) for details.
inpainting accepts all the arguments used for txt2img and img2img, as
well as the --mask (-M) argument:
well as the --mask (-M) and --text_mask (-tm) arguments:
| Argument <img width="100" align="right"/> | Shortcut | Default | Description |
|--------------------|------------|---------------------|--------------|
| `--init_mask <path>` | `-M<path>` | `None` |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|
| `--text_mask <prompt> [<float>]` | `-tm <prompt> [<float>]` | <none> | Create a mask from a text prompt describing part of the image|
## Convenience commands
`--text_mask` (short form `-tm`) is a way to generate a mask using a
text description of the part of the image to replace. For example, if
you have an image of a breakfast plate with a bagel, toast and
scrambled eggs, you can selectively mask the bagel and replace it with
a piece of cake this way:
In addition to the standard image generation arguments, there are a
series of convenience commands that begin with !:
~~~
invoke> a piece of cake -I /path/to/breakfast.png -tm bagel
~~~
The algorithm uses <a
href="https://github.com/timojl/clipseg">clipseg</a> to classify
different regions of the image. The classifier puts out a confidence
score for each region it identifies. Generally regions that score
above 0.5 are reliable, but if you are getting too much or too little
masking you can adjust the threshold down (to get more mask), or up
(to get less). In this example, by passing `-tm` a higher value, we
are insisting on a more stringent classification.
~~~
invoke> a piece of cake -I /path/to/breakfast.png -tm bagel 0.6
~~~
# Other Commands
The CLI offers a number of commands that begin with "!".
## Postprocessing images
To postprocess a file using face restoration or upscaling, use the
`!fix` command.
### `!fix`
@ -252,29 +283,171 @@ Some examples:
Outputs:
[1] outputs/img-samples/000017.4829112.gfpgan-00.png: !fix "outputs/img-samples/0000045.4829112.png" -s 50 -S -W 512 -H 512 -C 7.5 -A k_lms -G 0.8
### !mask
This command takes an image, a text prompt, and uses the `clipseg`
algorithm to automatically generate a mask of the area that matches
the text prompt. It is useful for debugging the text masking process
prior to inpainting with the `--text_mask` argument. See
[INPAINTING.md] for details.
## Model selection and importation
The CLI allows you to add new models on the fly, as well as to switch
among them rapidly without leaving the script.
### !models
This prints out a list of the models defined in `config/models.yaml'.
The active model is bold-faced
Example:
<pre>
laion400m not loaded <no description>
<b>stable-diffusion-1.4 active Stable Diffusion v1.4</b>
waifu-diffusion not loaded Waifu Diffusion v1.3
</pre>
### !switch <model>
This quickly switches from one model to another without leaving the
CLI script. `invoke.py` uses a memory caching system; once a model
has been loaded, switching back and forth is quick. The following
example shows this in action. Note how the second column of the
`!models` table changes to `cached` after a model is first loaded,
and that the long initialization step is not needed when loading
a cached model.
<pre>
invoke> !models
laion400m not loaded <no description>
<b>stable-diffusion-1.4 cached Stable Diffusion v1.4</b>
waifu-diffusion active Waifu Diffusion v1.3
invoke> !switch waifu-diffusion
>> Caching model stable-diffusion-1.4 in system RAM
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt
| LatentDiffusion: Running in eps-prediction mode
| DiffusionWrapper has 859.52 M params.
| Making attention of type 'vanilla' with 512 in_channels
| Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
| Making attention of type 'vanilla' with 512 in_channels
| Using faster float16 precision
>> Model loaded in 18.24s
>> Max VRAM used to load the model: 2.17G
>> Current VRAM usage:2.17G
>> Setting Sampler to k_lms
invoke> !models
laion400m not loaded <no description>
stable-diffusion-1.4 cached Stable Diffusion v1.4
<b>waifu-diffusion active Waifu Diffusion v1.3</b>
invoke> !switch stable-diffusion-1.4
>> Caching model waifu-diffusion in system RAM
>> Retrieving model stable-diffusion-1.4 from system RAM cache
>> Setting Sampler to k_lms
invoke> !models
laion400m not loaded <no description>
<b>stable-diffusion-1.4 active Stable Diffusion v1.4</b>
waifu-diffusion cached Waifu Diffusion v1.3
</pre>
### !import_model <path/to/model/weights>
This command imports a new model weights file into InvokeAI, makes it
available for image generation within the script, and writes out the
configuration for the model into `config/models.yaml` for use in
subsequent sessions.
Provide `!import_model` with the path to a weights file ending in
`.ckpt`. If you type a partial path and press tab, the CLI will
autocomplete. Although it will also autocomplete to `.vae` files,
these are not currenty supported (but will be soon).
When you hit return, the CLI will prompt you to fill in additional
information about the model, including the short name you wish to use
for it with the `!switch` command, a brief description of the model,
the default image width and height to use with this model, and the
model's configuration file. The latter three fields are automatically
filled with reasonable defaults. In the example below, the bold-faced
text shows what the user typed in with the exception of the width,
height and configuration file paths, which were filled in
automatically.
Example:
<pre>
invoke> <b>!import_model models/ldm/stable-diffusion-v1/ model-epoch08-float16.ckpt</b>
>> Model import in process. Please enter the values needed to configure this model:
Name for this model: <b>waifu-diffusion</b>
Description of this model: <b>Waifu Diffusion v1.3</b>
Configuration file for this model: <b>configs/stable-diffusion/v1-inference.yaml</b>
Default image width: <b>512</b>
Default image height: <b>512</b>
>> New configuration:
waifu-diffusion:
config: configs/stable-diffusion/v1-inference.yaml
description: Waifu Diffusion v1.3
height: 512
weights: models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt
width: 512
OK to import [n]? <b>y</b>
>> Caching model stable-diffusion-1.4 in system RAM
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt
| LatentDiffusion: Running in eps-prediction mode
| DiffusionWrapper has 859.52 M params.
| Making attention of type 'vanilla' with 512 in_channels
| Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
| Making attention of type 'vanilla' with 512 in_channels
| Using faster float16 precision
invoke>
</pre>
###!edit_model <name_of_model>
The `!edit_model` command can be used to modify a model that is
already defined in `config/models.yaml`. Call it with the short
name of the model you wish to modify, and it will allow you to
modify the model's `description`, `weights` and other fields.
Example:
<pre>
invoke> <b>!edit_model waifu-diffusion</b>
>> Editing model waifu-diffusion from configuration file ./configs/models.yaml
description: <b>Waifu diffusion v1.4beta</b>
weights: models/ldm/stable-diffusion-v1/<b>model-epoch10-float16.ckpt</b>
config: configs/stable-diffusion/v1-inference.yaml
width: 512
height: 512
>> New configuration:
waifu-diffusion:
config: configs/stable-diffusion/v1-inference.yaml
description: Waifu diffusion v1.4beta
weights: models/ldm/stable-diffusion-v1/model-epoch10-float16.ckpt
height: 512
width: 512
OK to import [n]? y
>> Caching model stable-diffusion-1.4 in system RAM
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/model-epoch10-float16.ckpt
...
</pre>
=======
invoke> !fix 000017.4829112.gfpgan-00.png --embiggen 3
...lots of text...
Outputs:
[2] outputs/img-samples/000018.2273800735.embiggen-00.png: !fix "outputs/img-samples/000017.243781548.gfpgan-00.png" -s 50 -S 2273800735 -W 512 -H 512 -C 7.5 -A k_lms --embiggen 3.0 0.75 0.25
```
## History processing
### `!fetch`
The CLI provides a series of convenient commands for reviewing previous
actions, retrieving them, modifying them, and re-running them.
This command retrieves the generation parameters from a previously
generated image and either loads them into the command line. You may
provide either the name of a file in the current output directory, or
a full file path.
```bash
invoke> !fetch 0000015.8929913.png
# the script returns the next line, ready for editing and running:
invoke> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
```
Note that this command may behave unexpectedly if given a PNG file that
was not generated by InvokeAI.
### `!history`
### !history
The invoke script keeps track of all the commands you issue during a
session, allowing you to re-run them. On Mac and Linux systems, it
@ -299,7 +472,44 @@ invoke> !20
invoke> watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
```
### `!search <search string>`
### !fetch
This command retrieves the generation parameters from a previously
generated image and either loads them into the command line
(Linux|Mac), or prints them out in a comment for copy-and-paste
(Windows). You may provide either the name of a file in the current
output directory, or a full file path. Specify path to a folder with
image png files, and wildcard *.png to retrieve the dream command used
to generate the images, and save them to a file commands.txt for
further processing.
This example loads the generation command for a single png file:
```bash
invoke> !fetch 0000015.8929913.png
# the script returns the next line, ready for editing and running:
invoke> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
```
This one fetches the generation commands from a batch of files and
stores them into `selected.txt`:
```bash
invoke> !fetch outputs\selected-imgs\*.png selected.txt
```
### !replay
This command replays a text file generated by !fetch or created manually
~~~
invoke> !replay outputs\selected-imgs\selected.txt
~~~
Note that these commands may behave unexpectedly if given a PNG file that
was not generated by InvokeAI.
### !search <search string>
This is similar to !history but it only returns lines that contain
`search string`. For example:

View File

@ -59,16 +59,13 @@ information underneath the transparent needs to be preserved, not erased.
!!! warning
`img2img` does not work properly on initial images smaller than 512x512. Please scale your
image to at least 512x512 before using it. Larger images are not a problem, but may run out of VRAM on your
GPU card.
To fix this, use the `--fit` option, which downscales the initial image to fit within the box specified
by width x height:
```bash
invoke> "tree on a hill with a river, national geographic" -I./test-pictures/big-sketch.png -H512 -W512 --fit
```
**IMPORTANT ISSUE** `img2img` does not work properly on initial images smaller than 512x512. Please scale your
image to at least 512x512 before using it. Larger images are not a problem, but may run out of VRAM on your
GPU card. To fix this, use the --fit option, which downscales the initial image to fit within the box specified
by width x height:
~~~
tree on a hill with a river, national geographic -I./test-pictures/big-sketch.png -H512 -W512 --fit
~~~
## How does it actually work, though?
@ -78,7 +75,7 @@ gaussian noise and progressively refines it over the requested number of steps,
**Let's start** by thinking about vanilla `prompt2img`, just generating an image from a prompt. If the step count is 10, then the "latent space" (Stable Diffusion's internal representation of the image) for the prompt "fire" with seed `1592514025` develops something like this:
```bash
```commandline
invoke> "fire" -s10 -W384 -H384 -S1592514025
```
@ -113,9 +110,9 @@ With strength `0.4`, the steps look more like this:
Notice how much more fuzzy the starting image is for strength `0.7` compared to `0.4`, and notice also how much longer the sequence is with `0.7`:
| | strength = 0.7 | strength = 0.4 |
| -- | :--: | :--: |
| initial image that SD sees | ![step-0-32](../assets/img2img/000032.step-0.png) | ![step-0-30](../assets/img2img/000030.step-0.png) |
| steps argument to `dream>` | `-S10` | `-S10` |
| -- | -- | -- |
| initial image that SD sees | ![](../assets/img2img/000032.step-0.png) | ![](../assets/img2img/000030.step-0.png) |
| steps argument to `invoke>` | `-S10` | `-S10` |
| steps actually taken | 7 | 4 |
| latent space at each step | ![gravity32](../assets/img2img/000032.steps.gravity.png) | ![gravity30](../assets/img2img/000030.steps.gravity.png) |
| output | ![000032.1592514025](../assets/img2img/000032.1592514025.png) | ![000030.1592514025](../assets/img2img/000030.1592514025.png) |
@ -124,11 +121,13 @@ Both of the outputs look kind of like what I was thinking of. With the strength
If you want to try this out yourself, all of these are using a seed of `1592514025` with a width/height of `384`, step count `10`, the default sampler (`k_lms`), and the single-word prompt `"fire"`:
```bash
If you want to try this out yourself, all of these are using a seed of `1592514025` with a width/height of `384`, step count `10`, the default sampler (`k_lms`), and the single-word prompt `fire`:
```commandline
invoke> "fire" -s10 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png --strength 0.7
```
The code for rendering intermediates is on my (damian0815's) branch [document-img2img](https://github.com/damian0815/InvokeAI/tree/document-img2img) - run `invoke.py` and check your `outputs/img-samples/intermediates` folder while generating an image.
The code for rendering intermediates is on my (damian0815's) branch [document-img2img](https://github.com/damian0815/InvokeAI/tree/document-img2img) - run `invoke.py` and check your `outputs/img-samples/intermediates` folder while generating an image.
### Compensating for the reduced step count
@ -136,7 +135,7 @@ After putting this guide together I was curious to see how the difference would
Here's strength `0.4` (note step count `50`, which is `20 ÷ 0.4` to make sure SD does `20` steps from my image):
```bash
```commandline
invoke> "fire" -s50 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png -f 0.4
```
@ -146,7 +145,7 @@ invoke> "fire" -s50 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png -f 0.4
and here is strength `0.7` (note step count `30`, which is roughly `20 ÷ 0.7` to make sure SD does `20` steps from my image):
```bash
```commandline
invoke> "fire" -s30 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png -f 0.7
```

View File

@ -6,27 +6,158 @@ title: Inpainting
## **Creating Transparent Regions for Inpainting**
Inpainting is really cool. To do it, you start with an initial image and use a photoeditor to make
one or more regions transparent (i.e. they have a "hole" in them). You then provide the path to this
image at the invoke> command line using the `-I` switch. Stable Diffusion will only paint within the
transparent region.
Inpainting is really cool. To do it, you start with an initial image
and use a photoeditor to make one or more regions transparent
(i.e. they have a "hole" in them). You then provide the path to this
image at the dream> command line using the `-I` switch. Stable
Diffusion will only paint within the transparent region.
There's a catch. In the current implementation, you have to prepare the initial image correctly so
that the underlying colors are preserved under the transparent area. Many imaging editing
applications will by default erase the color information under the transparent pixels and replace
them with white or black, which will lead to suboptimal inpainting. You also must take care to
export the PNG file in such a way that the color information is preserved.
There's a catch. In the current implementation, you have to prepare
the initial image correctly so that the underlying colors are
preserved under the transparent area. Many imaging editing
applications will by default erase the color information under the
transparent pixels and replace them with white or black, which will
lead to suboptimal inpainting. It often helps to apply incomplete
transparency, such as any value between 1 and 99%
If your photoeditor is erasing the underlying color information, `invoke.py` will give you a big fat
warning. If you can't find a way to coax your photoeditor to retain color values under transparent
areas, then you can combine the `-I` and `-M` switches to provide both the original unedited image
and the masked (partially transparent) image:
You also must take care to export the PNG file in such a way that the
color information is preserved. There is often an option in the export
dialog that lets you specify this.
If your photoeditor is erasing the underlying color information,
`dream.py` will give you a big fat warning. If you can't find a way to
coax your photoeditor to retain color values under transparent areas,
then you can combine the `-I` and `-M` switches to provide both the
original unedited image and the masked (partially transparent) image:
```bash
invoke> "man with cat on shoulder" -I./images/man.png -M./images/man-transparent.png
```
We are hoping to get rid of the need for this workaround in an upcoming release.
## **Masking using Text**
You can also create a mask using a text prompt to select the part of
the image you want to alter, using the <a
href="https://github.com/timojl/clipseg">clipseg</a> algorithm. This
works on any image, not just ones generated by InvokeAI.
The `--text_mask` (short form `-tm`) option takes two arguments. The
first argument is a text description of the part of the image you wish
to mask (paint over). If the text description contains a space, you must
surround it with quotation marks. The optional second argument is the
minimum threshold for the mask classifier's confidence score, described
in more detail below.
To see how this works in practice, here's an image of a still life
painting that I got off the web.
<img src="../assets/still-life-scaled.jpg">
You can selectively mask out the
orange and replace it with a baseball in this way:
~~~
invoke> a baseball -I /path/to/still_life.png -tm orange
~~~
<img src="../assets/still-life-inpainted.png">
The clipseg classifier produces a confidence score for each region it
identifies. Generally regions that score above 0.5 are reliable, but
if you are getting too much or too little masking you can adjust the
threshold down (to get more mask), or up (to get less). In this
example, by passing `-tm` a higher value, we are insisting on a tigher
mask. However, if you make it too high, the orange may not be picked
up at all!
~~~
invoke> a baseball -I /path/to/breakfast.png -tm orange 0.6
~~~
The `!mask` command may be useful for debugging problems with the
text2mask feature. The syntax is `!mask /path/to/image.png -tm <text>
<threshold>`
It will generate three files:
- The image with the selected area highlighted.
- it will be named XXXXX.<imagename>.<prompt>.selected.png
- The image with the un-selected area highlighted.
- it will be named XXXXX.<imagename>.<prompt>.deselected.png
- The image with the selected area converted into a black and white
image according to the threshold level
- it will be named XXXXX.<imagename>.<prompt>.masked.png
The `.masked.png` file can then be directly passed to the `invoke>`
prompt in the CLI via the `-M` argument. Do not attempt this with
the `selected.png` or `deselected.png` files, as they contain some
transparency throughout the image and will not produce the desired
results.
Here is an example of how `!mask` works:
```
invoke> !mask ./test-pictures/curly.png -tm hair 0.5
>> generating masks from ./test-pictures/curly.png
>> Initializing clipseg model for text to mask inference
Outputs:
[941.1] outputs/img-samples/000019.curly.hair.deselected.png: !mask ./test-pictures/curly.png -tm hair 0.5
[941.2] outputs/img-samples/000019.curly.hair.selected.png: !mask ./test-pictures/curly.png -tm hair 0.5
[941.3] outputs/img-samples/000019.curly.hair.masked.png: !mask ./test-pictures/curly.png -tm hair 0.5
```
**Original image "curly.png"**
<img src="../assets/outpainting/curly.png">
**000019.curly.hair.selected.png**
<img src="../assets/inpainting/000019.curly.hair.selected.png">
**000019.curly.hair.deselected.png**
<img src="../assets/inpainting/000019.curly.hair.deselected.png">
**000019.curly.hair.masked.png**
<img src="../assets/inpainting/000019.curly.hair.masked.png">
It looks like we selected the hair pretty well at the 0.5 threshold
(which is the default, so we didn't actually have to specify it), so
let's have some fun:
```
invoke> medusa with cobras -I ./test-pictures/curly.png -M 000019.curly.hair.masked.png -C20
>> loaded input image of size 512x512 from ./test-pictures/curly.png
...
Outputs:
[946] outputs/img-samples/000024.801380492.png: "medusa with cobras" -s 50 -S 801380492 -W 512 -H 512 -C 20.0 -I ./test-pictures/curly.png -A k_lms -f 0.75
```
<img src="../assets/inpainting/000024.801380492.png">
You can also skip the `!mask` creation step and just select the masked
region directly:
```
invoke> medusa with cobras -I ./test-pictures/curly.png -tm hair -C20
```
### Inpainting is not changing the masked region enough!
One of the things to understand about how inpainting works is that it
is equivalent to running img2img on just the masked (transparent)
area. img2img builds on top of the existing image data, and therefore
will attempt to preserve colors, shapes and textures to the best of
its ability. Unfortunately this means that if you want to make a
dramatic change in the inpainted region, for example replacing a red
wall with a blue one, the algorithm will fight you.
You have a couple of options. The first is to increase the values of
the requested steps (`-sXXX`), strength (`-f0.XX`), and/or
condition-free guidance (`-CXX.X`). If this is not working for you, a
more extreme step is to provide the `--inpaint_replace 0.X` (`-r0.X`)
option. This value ranges from 0.0 to 1.0. The higher it is the less
attention the algorithm will pay to the data underneath the masked
region. At high values this will enable you to replace colored regions
entirely, but beware that the masked region mayl not blend in with the
surrounding unmasked regions as well.
---
@ -35,10 +166,10 @@ We are hoping to get rid of the need for this workaround in an upcoming release.
[GIMP](https://www.gimp.org/) is a popular Linux photoediting tool.
1. Open image in GIMP.
2. Layer --> Transparency --> Add Alpha Channel
3. Use lasoo tool to select region to mask
4. Choose Select --> Float to create a floating selection
5. Open the Layers toolbar (++ctrl+l++) and select "Floating Selection"
2. Layer->Transparency->Add Alpha Channel
3. Use lasso tool to select region to mask
4. Choose Select -> Float to create a floating selection
5. Open the Layers toolbar (^L) and select "Floating Selection"
6. Set opacity to a value between 0% and 99%
7. Export as PNG
8. In the export dialogue, Make sure the "Save colour values from
@ -58,7 +189,7 @@ We are hoping to get rid of the need for this workaround in an upcoming release.
3. Because we'll be applying a mask over the area we want to preserve, you should now select the inverse by using the ++shift+ctrl+i++ shortcut, or right clicking and using the "Select Inverse" option.
4. You'll now create a mask by selecting the image layer, and Masking the selection. Make sure that you don't delete any of the undrlying image, or your inpainting results will be dramatically impacted.
4. You'll now create a mask by selecting the image layer, and Masking the selection. Make sure that you don't delete any of the underlying image, or your inpainting results will be dramatically impacted.
<div align="center" markdown>![step4](../assets/step4.png)</div>

View File

@ -26,6 +26,12 @@ for each `invoke>` prompt as shown here:
invoke> "pond garden with lotus by claude monet" --seamless -s100 -n4
```
By default this will tile on both the X and Y axes. However, you can also specify specific axes to tile on with `--seamless_axes`.
Possible values are `x`, `y`, and `x,y`:
```python
invoke> "pond garden with lotus by claude monet" --seamless --seamless_axes=x -s100 -n4
```
---
## **Shortcuts: Reusing Seeds**
@ -69,6 +75,23 @@ combination of integers and floating point numbers, and they do not need to add
---
## **Filename Format**
The argument `--fnformat` allows to specify the filename of the
image. Supported wildcards are all arguments what can be set such as
`perlin`, `seed`, `threshold`, `height`, `width`, `gfpgan_strength`,
`sampler_name`, `steps`, `model`, `upscale`, `prompt`, `cfg_scale`,
`prefix`.
The following prompt
```bash
dream> a red car --steps 25 -C 9.8 --perlin 0.1 --fnformat {prompt}_steps.{steps}_cfg.{cfg_scale}_perlin.{perlin}.png
```
generates a file with the name: `outputs/img-samples/a red car_steps.25_cfg.9.8_perlin.0.1.png`
---
## **Thresholding and Perlin Noise Initialization Options**
Two new options are the thresholding (`--threshold`) and the perlin noise initialization (`--perlin`) options. Thresholding limits the range of the latent values during optimization, which helps combat oversaturation with higher CFG scale values. Perlin noise initialization starts with a percentage (a value ranging from 0 to 1) of perlin noise mixed into the initial noise. Both features allow for more variations and options in the course of generating images.

View File

@ -70,7 +70,7 @@ If you do not explicitly specify an upscaling_strength, it will default to 0.75.
### Face Restoration
`-G : <gfpgan_strength>`
`-G : <facetool_strength>`
This prompt argument controls the strength of the face restoration that is being
applied. Similar to upscaling, values between `0.5 to 0.8` are recommended.

View File

@ -12,7 +12,7 @@ title: Home
-->
<div align="center" markdown>
# ^^**InvokeAI: A Stable Diffusion Toolkit**^^ :tools: <br> <small>Formally known as lstein/stable-diffusion</small>
# ^^**InvokeAI: A Stable Diffusion Toolkit**^^ :tools: <br> <small>Formerly known as lstein/stable-diffusion</small>
![project logo](assets/logo.png)

View File

@ -51,7 +51,15 @@ While that is downloading, open Terminal and run the following commands one at a
brew install cmake protobuf rust
```
Then choose the kind of your Mac and install miniconda:
Then clone the InvokeAI repository:
```bash title="Clone the InvokeAI repository:
# Clone the Invoke AI repo
git clone https://github.com/invoke-ai/InvokeAI.git
cd InvokeAI
```
Choose the appropriate architecture for your system and install miniconda:
=== "M1 arm64"
@ -81,7 +89,7 @@ While that is downloading, open Terminal and run the following commands one at a
!!! todo "Clone the Invoke AI repo"
```bash
```bash
git clone https://github.com/invoke-ai/InvokeAI.git
cd InvokeAI
```
@ -178,7 +186,7 @@ conda install \
pytorch \
torchvision \
-c pytorch-nightly \
-n ldm
-n invokeai
```
If it takes forever to run `conda env create -f environment-mac.yml`, try this:
@ -202,11 +210,11 @@ conda update \
---
### "No module named cv2", torch, 'ldm', 'transformers', 'taming', etc
### "No module named cv2", torch, 'invokeai', 'transformers', 'taming', etc
There are several causes of these errors:
1. Did you remember to `conda activate ldm`? If your terminal prompt begins with
1. Did you remember to `conda activate invokeai`? If your terminal prompt begins with
"(invokeai)" then you activated it. If it begins with "(base)" or something else
you haven't.
@ -221,17 +229,17 @@ There are several causes of these errors:
```bash
conda deactivate
conda env remove -n ldm
conda env remove -n invokeai
conda env create -f environment-mac.yml
```
4. If you have activated the ldm virtual environment and tried rebuilding it,
4. If you have activated the invokeai virtual environment and tried rebuilding it,
maybe the problem could be that I have something installed that you don't and
you'll just need to manually install it. Make sure you activate the virtual
environment so it installs there instead of globally.
```bash
conda activate ldm
conda activate invokeai
pip install <package name>
```
@ -290,11 +298,11 @@ output of `python3 -V` and `python -V`.
```bash
(invokeai) % which python
/Users/name/miniforge3/envs/ldm/bin/python
/Users/name/miniforge3/envs/invokeai/bin/python
```
The above is what you'll see if you have miniforge and correctly activated the
ldm environment, while usingd the standalone setup instructions above.
invokeai environment, while usingd the standalone setup instructions above.
If you otherwise installed via pyenv, you will get this result:
@ -474,7 +482,7 @@ this issue too. I should probably test it.
### "view size is not compatible with input tensor's size and stride"
```bash
File "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py", line 2511, in layer_norm
File "/opt/anaconda3/envs/invokeai/lib/python3.10/site-packages/torch/nn/functional.py", line 2511, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
```
@ -510,7 +518,7 @@ Generating: 0%| |
loc("mps_add"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/20d6c351-ee94-11ec-bcaf-7247572f23b4/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":219:0)): error: input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
LLVM ERROR: Failed to infer result type(s).
Abort trap: 6
/Users/[...]/opt/anaconda3/envs/ldm/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
/Users/[...]/opt/anaconda3/envs/invokeai/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
```

View File

@ -19,6 +19,7 @@ dependencies:
# ```
- albumentations==1.2.1
- coloredlogs==15.0.1
- diffusers==0.6.0
- einops==0.4.1
- grpcio==1.46.4
- humanfriendly==10.0
@ -47,16 +48,17 @@ dependencies:
- dependency_injector==4.40.0
- eventlet==0.33.1
- opencv-python==4.6.0
- protobuf==3.19.5
- protobuf==3.19.6
- realesrgan==0.2.5.0
- send2trash==1.8.0
- test-tube==0.7.5
- transformers==4.21.2
- transformers==4.21.3
- torch-fidelity==0.3.0
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
- -e git+https://github.com/openai/CLIP.git@main#egg=clip
- -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
- -e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
- -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
- -e .
variables:
PYTORCH_ENABLE_MPS_FALLBACK: 1

View File

@ -15,7 +15,7 @@ dependencies:
- pudb==2019.2
- imageio==2.9.0
- imageio-ffmpeg==0.4.2
- pytorch-lightning==1.4.2
- pytorch-lightning==1.7.7
- omegaconf==2.1.1
- realesrgan==0.2.5.0
- test-tube>=0.7.5
@ -25,8 +25,9 @@ dependencies:
- einops==0.3.0
- pyreadline3
- torch-fidelity==0.3.0
- transformers==4.19.2
- torchmetrics==0.6.0
- transformers==4.21.3
- diffusers==0.6.0
- torchmetrics==0.7.0
- flask==2.1.3
- flask_socketio==5.3.0
- flask_cors==3.0.10
@ -37,4 +38,5 @@ dependencies:
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
- -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
- -e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
- -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
- -e .

483
frontend/dist/assets/index.0a6593a2.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -5,9 +5,9 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>InvokeAI - A Stable Diffusion Toolkit</title>
<link rel="shortcut icon" type="icon" href="/assets/favicon.0d253ced.ico" />
<script type="module" crossorigin src="/assets/index.989a0ca2.js"></script>
<link rel="stylesheet" href="/assets/index.58175ea1.css">
<link rel="shortcut icon" type="icon" href="./assets/favicon.0d253ced.ico" />
<script type="module" crossorigin src="./assets/index.0a6593a2.js"></script>
<link rel="stylesheet" href="./assets/index.193aec6f.css">
</head>
<body>

View File

@ -14,6 +14,7 @@
"@chakra-ui/react": "^2.3.1",
"@emotion/react": "^11.10.4",
"@emotion/styled": "^11.10.4",
"@radix-ui/react-context-menu": "^2.0.1",
"@reduxjs/toolkit": "^1.8.5",
"@types/uuid": "^8.3.4",
"dateformat": "^5.0.3",
@ -25,7 +26,6 @@
"react-dropzone": "^14.2.2",
"react-hotkeys-hook": "^3.4.7",
"react-icons": "^4.4.0",
"react-masonry-css": "^1.0.16",
"react-redux": "^8.0.2",
"redux-persist": "^6.0.0",
"socket.io": "^4.5.2",

View File

@ -32,26 +32,8 @@ export const UPSCALING_LEVELS: Array<{ key: string; value: number }> = [
{ key: '4x', value: 4 },
];
// Internal to human-readable parameters
export const PARAMETERS: { [key: string]: string } = {
prompt: 'Prompt',
iterations: 'Iterations',
steps: 'Steps',
cfgScale: 'CFG Scale',
height: 'Height',
width: 'Width',
sampler: 'Sampler',
seed: 'Seed',
img2imgStrength: 'img2img Strength',
gfpganStrength: 'GFPGAN Strength',
upscalingLevel: 'Upscaling Level',
upscalingStrength: 'Upscaling Strength',
initialImagePath: 'Initial Image',
maskPath: 'Initial Image Mask',
shouldFitToWidthHeight: 'Fit Initial Image',
seamless: 'Seamless Tiling',
};
export const NUMPY_RAND_MIN = 0;
export const NUMPY_RAND_MAX = 4294967295;
export const FACETOOL_TYPES = ['gfpgan', 'codeformer'] as const;

View File

@ -14,10 +14,13 @@ export enum Feature {
FACE_CORRECTION,
IMAGE_TO_IMAGE,
}
/** For each tooltip in the UI, the below feature definitions & props will pull relevant information into the tooltip.
*
* To-do: href & GuideImages are placeholders, and are not currently utilized, but will be updated (along with the tooltip UI) as feature and UI development and we get a better idea on where things "forever homes" will be .
*/
export const FEATURES: Record<Feature, FeatureHelpInfo> = {
[Feature.PROMPT]: {
text: 'This field will take all prompt text, including both content and stylistic terms. CLI Commands will not work in the prompt.',
text: 'This field will take all prompt text, including both content and stylistic terms. While weights can be included in the prompt, standard CLI Commands/parameters will not work.',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
@ -27,17 +30,17 @@ export const FEATURES: Record<Feature, FeatureHelpInfo> = {
guideImage: 'asset/path.gif',
},
[Feature.OTHER]: {
text: 'Additional Options',
text: 'These options will enable alternative processing modes for Invoke. Seamless tiling will work to generate repeating patterns in the output. High Resolution Optimization performs a two-step generation cycle, and should be used at higher resolutions when you desire a more coherent image/composition. ',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
[Feature.SEED]: {
text: 'Seed values provide an initial set of noise which guide the denoising process.',
text: 'Seed values provide an initial set of noise which guide the denoising process, and can be randomized or populated with a seed from a previous invocation. The Threshold feature can be used to mitigate undesirable outcomes at higher CFG values (try between 0-10), and Perlin can be used to add Perlin noise into the denoising process - Both serve to add variation to your outputs. ',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
[Feature.VARIATIONS]: {
text: 'Try a variation with an amount of between 0 and 1 to change the output image for the set seed.',
text: 'Try a variation with an amount of between 0 and 1 to change the output image for the set seed - Interesting variations on the seed are found between 0.1 and 0.3.',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
@ -47,8 +50,8 @@ export const FEATURES: Record<Feature, FeatureHelpInfo> = {
guideImage: 'asset/path.gif',
},
[Feature.FACE_CORRECTION]: {
text: 'Using GFPGAN or CodeFormer, Face Correction will attempt to identify faces in outputs, and correct any defects/abnormalities. Higher values will apply a stronger corrective pressure on outputs.',
href: 'link/to/docs/feature2.html',
text: 'Using GFPGAN or Codeformer, Face Correction will attempt to identify faces in outputs, and correct any defects/abnormalities. Higher strength values will apply a stronger corrective pressure on outputs, resulting in more appealing faces. With Codeformer, a higher fidelity will attempt to preserve the original image, at the expense of face correction strength.',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
[Feature.IMAGE_TO_IMAGE]: {

View File

@ -55,6 +55,7 @@ export declare type CommonGeneratedImageMetadata = {
width: number;
height: number;
seamless: boolean;
hires_fix: boolean;
extra: null | Record<string, never>; // Pending development of RFC #266
};
@ -88,15 +89,16 @@ export declare type ESRGANMetadata = CommonPostProcessedImageMetadata & {
strength: number;
};
export declare type GFPGANMetadata = CommonPostProcessedImageMetadata & {
type: 'gfpgan';
export declare type FacetoolMetadata = CommonPostProcessedImageMetadata & {
type: 'gfpgan' | 'codeformer';
strength: number;
fidelity?: number;
};
// Superset of all postprocessed image metadata types..
export declare type PostProcessedImageMetadata =
| ESRGANMetadata
| GFPGANMetadata;
| FacetoolMetadata;
// Metadata includes the system config and image metadata.
export declare type Metadata = SystemConfig & {

View File

@ -10,7 +10,7 @@ import * as InvokeAI from '../invokeai';
export const generateImage = createAction<undefined>('socketio/generateImage');
export const runESRGAN = createAction<InvokeAI.Image>('socketio/runESRGAN');
export const runGFPGAN = createAction<InvokeAI.Image>('socketio/runGFPGAN');
export const runFacetool = createAction<InvokeAI.Image>('socketio/runFacetool');
export const deleteImage = createAction<InvokeAI.Image>('socketio/deleteImage');
export const requestImages = createAction<undefined>(
'socketio/requestImages'

View File

@ -26,18 +26,18 @@ const makeSocketIOEmitters = (
const options = { ...getState().options };
if (tabMap[options.activeTab] === 'txt2img') {
if (tabMap[options.activeTab] !== 'img2img') {
options.shouldUseInitImage = false;
}
const { generationParameters, esrganParameters, gfpganParameters } =
const { generationParameters, esrganParameters, facetoolParameters } =
frontendToBackendParameters(options, getState().system);
socketio.emit(
'generateImage',
generationParameters,
esrganParameters,
gfpganParameters
facetoolParameters
);
dispatch(
@ -46,7 +46,7 @@ const makeSocketIOEmitters = (
message: `Image generation requested: ${JSON.stringify({
...generationParameters,
...esrganParameters,
...gfpganParameters,
...facetoolParameters,
})}`,
})
);
@ -71,24 +71,32 @@ const makeSocketIOEmitters = (
})
);
},
emitRunGFPGAN: (imageToProcess: InvokeAI.Image) => {
emitRunFacetool: (imageToProcess: InvokeAI.Image) => {
dispatch(setIsProcessing(true));
const { gfpganStrength } = getState().options;
const { facetoolType, facetoolStrength, codeformerFidelity } =
getState().options;
const gfpganParameters = {
gfpgan_strength: gfpganStrength,
const facetoolParameters: Record<string, any> = {
facetool_strength: facetoolStrength,
};
if (facetoolType === 'codeformer') {
facetoolParameters.codeformer_fidelity = codeformerFidelity;
}
socketio.emit('runPostprocessing', imageToProcess, {
type: 'gfpgan',
...gfpganParameters,
type: facetoolType,
...facetoolParameters,
});
dispatch(
addLogEntry({
timestamp: dateFormat(new Date(), 'isoDateTime'),
message: `GFPGAN fix faces requested: ${JSON.stringify({
file: imageToProcess.url,
...gfpganParameters,
})}`,
message: `Face restoration (${facetoolType}) requested: ${JSON.stringify(
{
file: imageToProcess.url,
...facetoolParameters,
}
)}`,
})
);
},

View File

@ -151,32 +151,6 @@ const makeSocketIOListeners = (
console.error(e);
}
},
/**
* Callback to run when we receive a 'gfpganResult' event.
*/
onGFPGANResult: (data: InvokeAI.ImageResultResponse) => {
try {
const { url, metadata, mtime } = data;
dispatch(
addImage({
uuid: uuidv4(),
url,
mtime,
metadata,
})
);
dispatch(
addLogEntry({
timestamp: dateFormat(new Date(), 'isoDateTime'),
message: `Fixed faces: ${url}`,
})
);
} catch (e) {
console.error(e);
}
},
/**
* Callback to run when we receive a 'progressUpdate' event.
* TODO: Add additional progress phases

View File

@ -22,10 +22,11 @@ import * as InvokeAI from '../invokeai';
* some new action to handle whatever data was sent from the server.
*/
export const socketioMiddleware = () => {
const { hostname, port } = new URL(window.location.href);
const { origin } = new URL(window.location.href);
const socketio = io(`http://${hostname}:${port}`, {
const socketio = io(origin, {
timeout: 60000,
path: window.location.pathname + 'socket.io',
});
let areListenersSet = false;
@ -50,7 +51,7 @@ export const socketioMiddleware = () => {
const {
emitGenerateImage,
emitRunESRGAN,
emitRunGFPGAN,
emitRunFacetool,
emitDeleteImage,
emitRequestImages,
emitRequestNewImages,
@ -129,8 +130,8 @@ export const socketioMiddleware = () => {
break;
}
case 'socketio/runGFPGAN': {
emitRunGFPGAN(action.payload);
case 'socketio/runFacetool': {
emitRunFacetool(action.payload);
break;
}

View File

@ -7,12 +7,17 @@ export const PostProcessingWIP = () => {
<p>
Invoke AI offers a wide variety of post processing features. Image
Upscaling and Face Restoration are already available in the WebUI. You
can access them from the Advanced Options menu of the Text To Image tab.
A dedicated UI will be released soon.
can access them from the Advanced Options menu of the Text To Image and
Image To Image tabs. You can also process images directly, using the
image action buttons above the main image display.
</p>
<p>
A dedicated UI will be released soon to facilitate more advanced post
processing workflows.
</p>
<p>
The Invoke AI Command Line Interface offers various other features
including Embiggen, High Resolution Fixing and more.
including Embiggen.
</p>
</div>
);

View File

@ -29,6 +29,7 @@ export const frontendToBackendParameters = (
sampler,
seed,
seamless,
hiresFix,
shouldUseInitImage,
img2imgStrength,
initialImagePath,
@ -40,8 +41,10 @@ export const frontendToBackendParameters = (
shouldRunESRGAN,
upscalingLevel,
upscalingStrength,
shouldRunGFPGAN,
gfpganStrength,
shouldRunFacetool,
facetoolStrength,
codeformerFidelity,
facetoolType,
shouldRandomizeSeed,
} = optionsState;
@ -59,6 +62,7 @@ export const frontendToBackendParameters = (
sampler_name: sampler,
seed,
seamless,
hires_fix: hiresFix,
progress_images: shouldDisplayInProgress,
};
@ -86,7 +90,7 @@ export const frontendToBackendParameters = (
}
let esrganParameters: false | { [k: string]: any } = false;
let gfpganParameters: false | { [k: string]: any } = false;
let facetoolParameters: false | { [k: string]: any } = false;
if (shouldRunESRGAN) {
esrganParameters = {
@ -95,97 +99,19 @@ export const frontendToBackendParameters = (
};
}
if (shouldRunGFPGAN) {
gfpganParameters = {
strength: gfpganStrength,
if (shouldRunFacetool) {
facetoolParameters = {
type: facetoolType,
strength: facetoolStrength,
};
if (facetoolType === 'codeformer') {
facetoolParameters.codeformer_fidelity = codeformerFidelity
}
}
return {
generationParameters,
esrganParameters,
gfpganParameters,
facetoolParameters,
};
};
export const backendToFrontendParameters = (parameters: {
[key: string]: any;
}) => {
const {
prompt,
iterations,
steps,
cfg_scale,
threshold,
perlin,
height,
width,
sampler_name,
seed,
seamless,
progress_images,
variation_amount,
with_variations,
gfpgan_strength,
upscale,
init_img,
init_mask,
strength,
} = parameters;
const options: { [key: string]: any } = {
shouldDisplayInProgress: progress_images,
// init
shouldGenerateVariations: false,
shouldRunESRGAN: false,
shouldRunGFPGAN: false,
initialImagePath: '',
maskPath: '',
};
if (variation_amount > 0) {
options.shouldGenerateVariations = true;
options.variationAmount = variation_amount;
if (with_variations) {
options.seedWeights = seedWeightsToString(with_variations);
}
}
if (gfpgan_strength > 0) {
options.shouldRunGFPGAN = true;
options.gfpganStrength = gfpgan_strength;
}
if (upscale) {
options.shouldRunESRGAN = true;
options.upscalingLevel = upscale[0];
options.upscalingStrength = upscale[1];
}
if (init_img) {
options.shouldUseInitImage = true;
options.initialImagePath = init_img;
options.strength = strength;
if (init_mask) {
options.maskPath = init_mask;
}
}
// if we had a prompt, add all the metadata, but if we don't have a prompt,
// we must have only done ESRGAN or GFPGAN so do not add that metadata
if (prompt) {
options.prompt = prompt;
options.iterations = iterations;
options.steps = steps;
options.cfgScale = cfg_scale;
options.threshold = threshold;
options.perlin = perlin;
options.height = height;
options.width = width;
options.sampler = sampler_name;
options.seed = seed;
options.seamless = seamless;
}
return options;
};

View File

@ -15,7 +15,7 @@ import {
import DeleteImageModal from './DeleteImageModal';
import { SystemState } from '../system/systemSlice';
import IAIButton from '../../common/components/IAIButton';
import { runESRGAN, runGFPGAN } from '../../app/socketio/actions';
import { runESRGAN, runFacetool } from '../../app/socketio/actions';
import IAIIconButton from '../../common/components/IAIIconButton';
import { MdDelete, MdFace, MdHd, MdImage, MdInfo } from 'react-icons/md';
import InvokePopover from './InvokePopover';
@ -66,8 +66,8 @@ const CurrentImageButtons = ({ image }: CurrentImageButtonsProps) => {
(state: RootState) => state.options.upscalingLevel
);
const gfpganStrength = useAppSelector(
(state: RootState) => state.options.gfpganStrength
const facetoolStrength = useAppSelector(
(state: RootState) => state.options.facetoolStrength
);
const { isProcessing, isConnected, isGFPGANAvailable, isESRGANAvailable } =
@ -186,7 +186,8 @@ const CurrentImageButtons = ({ image }: CurrentImageButtonsProps) => {
]
);
const handleClickFixFaces = () => dispatch(runGFPGAN(image));
const handleClickFixFaces = () => dispatch(runFacetool(image));
useHotkeys(
'r',
() => {
@ -195,7 +196,7 @@ const CurrentImageButtons = ({ image }: CurrentImageButtonsProps) => {
Boolean(!intermediateImage) &&
isConnected &&
!isProcessing &&
gfpganStrength
facetoolStrength
) {
handleClickFixFaces();
} else {
@ -213,7 +214,7 @@ const CurrentImageButtons = ({ image }: CurrentImageButtonsProps) => {
intermediateImage,
isConnected,
isProcessing,
gfpganStrength,
facetoolStrength,
]
);
@ -270,7 +271,7 @@ const CurrentImageButtons = ({ image }: CurrentImageButtonsProps) => {
!isGFPGANAvailable ||
Boolean(intermediateImage) ||
!(isConnected && !isProcessing) ||
!gfpganStrength
!facetoolStrength
}
onClick={handleClickFixFaces}
/>

View File

@ -12,6 +12,7 @@ import {
FormControl,
FormLabel,
Flex,
useToast,
} from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import {
@ -57,6 +58,7 @@ const DeleteImageModal = forwardRef(
const dispatch = useAppDispatch();
const shouldConfirmOnDelete = useAppSelector(systemSelector);
const cancelRef = useRef<HTMLButtonElement>(null);
const toast = useToast();
const handleClickDelete = (e: SyntheticEvent) => {
e.stopPropagation();
@ -65,6 +67,12 @@ const DeleteImageModal = forwardRef(
const handleDelete = () => {
dispatch(deleteImage(image));
toast({
title: 'Image Deleted',
status: 'success',
duration: 2500,
isClosable: true,
});
onClose();
};

View File

@ -17,6 +17,12 @@
max-height: 100%;
}
.hoverable-image-delete-button {
position: absolute;
top: 0.25rem;
right: 0.25rem;
}
.hoverable-image-content {
display: flex;
position: absolute;
@ -57,3 +63,39 @@
}
}
}
.hoverable-image-context-menu {
z-index: 999;
padding: 0.4rem;
border-radius: 0.25rem;
background-color: var(--context-menu-bg-color);
box-shadow: var(--context-menu-box-shadow);
[role='menuitem'] {
font-size: 0.8rem;
line-height: 1rem;
border-radius: 3px;
display: flex;
align-items: center;
height: 1.75rem;
padding: 0 0.5rem;
position: relative;
user-select: none;
cursor: pointer;
outline: none;
&[data-disabled] {
color: grey;
pointer-events: none;
cursor: not-allowed;
}
&[data-warning] {
color: var(--status-bad-color);
}
&[data-highlighted] {
background-color: var(--context-menu-bg-color-hover);
}
}
}

View File

@ -1,17 +1,27 @@
import { Box, Icon, IconButton, Image, Tooltip } from '@chakra-ui/react';
import {
Box,
Icon,
IconButton,
Image,
Tooltip,
useToast,
} from '@chakra-ui/react';
import { RootState, useAppDispatch, useAppSelector } from '../../app/store';
import { setCurrentImage } from './gallerySlice';
import { FaCheck, FaImage, FaSeedling, FaTrashAlt } from 'react-icons/fa';
import { FaCheck, FaTrashAlt } from 'react-icons/fa';
import DeleteImageModal from './DeleteImageModal';
import { memo, SyntheticEvent, useState } from 'react';
import { memo, useState } from 'react';
import {
setActiveTab,
setAllParameters,
setAllImageToImageParameters,
setAllTextToImageParameters,
setInitialImagePath,
setPrompt,
setSeed,
} from '../options/optionsSlice';
import * as InvokeAI from '../../app/invokeai';
import { IoArrowUndoCircleOutline } from 'react-icons/io5';
import * as ContextMenu from '@radix-ui/react-context-menu';
import { tabMap } from '../tabs/InvokeTabs';
interface HoverableImageProps {
image: InvokeAI.Image;
@ -27,115 +37,174 @@ const memoEqualityCheck = (
* Gallery image component with delete/use all/use seed buttons on hover.
*/
const HoverableImage = memo((props: HoverableImageProps) => {
const [isHovered, setIsHovered] = useState<boolean>(false);
const dispatch = useAppDispatch();
const activeTab = useAppSelector(
(state: RootState) => state.options.activeTab
);
const [isHovered, setIsHovered] = useState<boolean>(false);
const toast = useToast();
const { image, isSelected } = props;
const { url, uuid, metadata } = image;
const handleMouseOver = () => setIsHovered(true);
const handleMouseOut = () => setIsHovered(false);
const handleClickSetAllParameters = (e: SyntheticEvent) => {
e.stopPropagation();
dispatch(setAllParameters(metadata));
const handleUsePrompt = () => {
dispatch(setPrompt(image.metadata.image.prompt));
toast({
title: 'Prompt Set',
status: 'success',
duration: 2500,
isClosable: true,
});
};
const handleClickSetSeed = (e: SyntheticEvent) => {
e.stopPropagation();
const handleUseSeed = () => {
dispatch(setSeed(image.metadata.image.seed));
toast({
title: 'Seed Set',
status: 'success',
duration: 2500,
isClosable: true,
});
};
const handleSetInitImage = (e: SyntheticEvent) => {
e.stopPropagation();
const handleSendToImageToImage = () => {
dispatch(setInitialImagePath(image.url));
if (activeTab !== 1) {
dispatch(setActiveTab(1));
}
toast({
title: 'Sent to Image To Image',
status: 'success',
duration: 2500,
isClosable: true,
});
};
const handleClickImage = () => dispatch(setCurrentImage(image));
const handleUseAllParameters = () => {
dispatch(setAllTextToImageParameters(metadata));
toast({
title: 'Parameters Set',
status: 'success',
duration: 2500,
isClosable: true,
});
};
const handleUseInitialImage = async () => {
// check if the image exists before setting it as initial image
if (metadata?.image?.init_image_path) {
const response = await fetch(metadata.image.init_image_path);
if (response.ok) {
dispatch(setActiveTab(tabMap.indexOf('img2img')));
dispatch(setAllImageToImageParameters(metadata));
toast({
title: 'Initial Image Set',
status: 'success',
duration: 2500,
isClosable: true,
});
return;
}
}
toast({
title: 'Initial Image Not Set',
description: 'Could not load initial image.',
status: 'error',
duration: 2500,
isClosable: true,
});
};
const handleSelectImage = () => dispatch(setCurrentImage(image));
return (
<Box
position={'relative'}
key={uuid}
className="hoverable-image"
onMouseOver={handleMouseOver}
onMouseOut={handleMouseOut}
>
<Image
objectFit="cover"
rounded={'md'}
src={url}
loading={'lazy'}
className="hoverable-image-image"
/>
<div className="hoverable-image-content" onClick={handleClickImage}>
{isSelected && (
<Icon
width={'50%'}
height={'50%'}
as={FaCheck}
className="hoverable-image-check"
<ContextMenu.Root>
<ContextMenu.Trigger>
<Box
position={'relative'}
key={uuid}
className="hoverable-image"
onMouseOver={handleMouseOver}
onMouseOut={handleMouseOut}
>
<Image
className="hoverable-image-image"
objectFit="cover"
rounded={'md'}
src={url}
loading={'lazy'}
/>
)}
</div>
{isHovered && (
<div className="hoverable-image-icons">
<Tooltip label={'Delete image'} hasArrow>
<DeleteImageModal image={image}>
<IconButton
colorScheme="red"
aria-label="Delete image"
icon={<FaTrashAlt />}
size="xs"
variant={'imageHoverIconButton'}
fontSize={14}
<div className="hoverable-image-content" onClick={handleSelectImage}>
{isSelected && (
<Icon
width={'50%'}
height={'50%'}
as={FaCheck}
className="hoverable-image-check"
/>
</DeleteImageModal>
</Tooltip>
{['txt2img', 'img2img'].includes(image?.metadata?.image?.type) && (
<Tooltip label="Use All Parameters" hasArrow>
<IconButton
aria-label="Use All Parameters"
icon={<IoArrowUndoCircleOutline />}
size="xs"
fontSize={18}
variant={'imageHoverIconButton'}
onClickCapture={handleClickSetAllParameters}
/>
</Tooltip>
)}
</div>
{isHovered && (
<div className="hoverable-image-delete-button">
<Tooltip label={'Delete image'} hasArrow>
<DeleteImageModal image={image}>
<IconButton
aria-label="Delete image"
icon={<FaTrashAlt />}
size="xs"
variant={'imageHoverIconButton'}
fontSize={14}
/>
</DeleteImageModal>
</Tooltip>
</div>
)}
{image?.metadata?.image?.seed !== undefined && (
<Tooltip label="Use Seed" hasArrow>
<IconButton
aria-label="Use Seed"
icon={<FaSeedling />}
size="xs"
fontSize={16}
variant={'imageHoverIconButton'}
onClickCapture={handleClickSetSeed}
/>
</Tooltip>
)}
<Tooltip label="Send To Image To Image" hasArrow>
<IconButton
aria-label="Send To Image To Image"
icon={<FaImage />}
size="xs"
fontSize={16}
variant={'imageHoverIconButton'}
onClickCapture={handleSetInitImage}
/>
</Tooltip>
</div>
)}
</Box>
</Box>
</ContextMenu.Trigger>
<ContextMenu.Content className="hoverable-image-context-menu">
<ContextMenu.Item
onClickCapture={handleUsePrompt}
disabled={image?.metadata?.image?.prompt === undefined}
>
Use Prompt
</ContextMenu.Item>
<ContextMenu.Item
onClickCapture={handleUseSeed}
disabled={image?.metadata?.image?.seed === undefined}
>
Use Seed
</ContextMenu.Item>
<ContextMenu.Item
onClickCapture={handleUseAllParameters}
disabled={
!['txt2img', 'img2img'].includes(image?.metadata?.image?.type)
}
>
Use All Parameters
</ContextMenu.Item>
<Tooltip label="Load initial image used for this generation">
<ContextMenu.Item
onClickCapture={handleUseInitialImage}
disabled={image?.metadata?.image?.type !== 'img2img'}
>
Use Initial Image
</ContextMenu.Item>
</Tooltip>
<ContextMenu.Item onClickCapture={handleSendToImageToImage}>
Send to Image To Image
</ContextMenu.Item>
<DeleteImageModal image={image}>
<ContextMenu.Item data-warning>Delete Image</ContextMenu.Item>
</DeleteImageModal>
</ContextMenu.Content>
</ContextMenu.Root>
);
}, memoEqualityCheck);

View File

@ -55,31 +55,37 @@
@include HideScrollbar;
}
.masonry-grid {
display: -webkit-box; /* Not needed if autoprefixing */
display: -ms-flexbox; /* Not needed if autoprefixing */
display: flex;
margin-left: 0.5rem; /* gutter size offset */
width: auto;
}
.masonry-grid_column {
padding-left: 0.5rem; /* gutter size */
background-clip: padding-box;
}
// from https://css-tricks.com/a-grid-of-logos-in-squares/
.image-gallery {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(80px, auto));
grid-gap: 0.5rem;
.hoverable-image {
padding: 0.5rem;
position: relative;
&::before {
// for apsect ratio
content: '';
display: block;
padding-bottom: 100%;
}
.hoverable-image-image {
position: absolute;
max-width: 100%;
/* Style your items */
.masonry-grid_column > .hoverable-image {
/* change div to reference your elements you put in <Masonry> */
background: var(--tab-color);
margin-bottom: 0.5rem;
}
// Alternate Version
// top: 0;
// bottom: 0;
// right: 0;
// left: 0;
// margin: auto;
// .image-gallery {
// display: flex;
// grid-template-columns: repeat(auto-fill, minmax(80px, auto));
// gap: 0.5rem;
// justify-items: center;
// }
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
}
}
}
.image-gallery-load-more-btn {
background-color: var(--btn-load-more) !important;

View File

@ -1,10 +1,9 @@
import { Button, IconButton } from '@chakra-ui/button';
import { Resizable } from 're-resizable';
import React, { useState } from 'react';
import React from 'react';
import { useHotkeys } from 'react-hotkeys-hook';
import { MdClear, MdPhotoLibrary } from 'react-icons/md';
import Masonry from 'react-masonry-css';
import { requestImages } from '../../app/socketio/actions';
import { RootState, useAppDispatch, useAppSelector } from '../../app/store';
import IAIIconButton from '../../common/components/IAIIconButton';
@ -27,12 +26,6 @@ export default function ImageGallery() {
const dispatch = useAppDispatch();
const [column, setColumn] = useState<number | undefined>();
const handleResize = (event: MouseEvent | TouchEvent | any) => {
setColumn(Math.floor((window.innerWidth - event.x) / 120));
};
const handleShowGalleryToggle = () => {
dispatch(setShouldShowGallery(!shouldShowGallery));
};
@ -89,9 +82,7 @@ export default function ImageGallery() {
minWidth={'300'}
maxWidth={activeTab == 1 ? '300' : '600'}
className="image-gallery-popup"
onResize={handleResize}
>
{/* <div className="image-gallery-popup"></div> */}
<div className="image-gallery-header">
<h1>Your Invocations</h1>
<IconButton
@ -104,12 +95,7 @@ export default function ImageGallery() {
</div>
<div className="image-gallery-container">
{images.length ? (
<Masonry
className="masonry-grid"
columnClassName="masonry-grid_column"
breakpointCols={column}
>
{/* <div className="image-gallery"> */}
<div className="image-gallery">
{images.map((image) => {
const { uuid } = image;
const isSelected = currentImageUuid === uuid;
@ -121,8 +107,7 @@ export default function ImageGallery() {
/>
);
})}
{/* </div> */}
</Masonry>
</div>
) : (
<div className="image-gallery-container-placeholder">
<MdPhotoLibrary />

View File

@ -14,13 +14,17 @@ import { useAppDispatch } from '../../../app/store';
import * as InvokeAI from '../../../app/invokeai';
import {
setCfgScale,
setGfpganStrength,
setFacetoolStrength,
setCodeformerFidelity,
setFacetoolType,
setHeight,
setHiresFix,
setImg2imgStrength,
setInitialImagePath,
setMaskPath,
setPrompt,
setSampler,
setSeamless,
setSeed,
setSeedWeights,
setShouldFitToWidthHeight,
@ -116,6 +120,7 @@ const ImageMetadataViewer = memo(
steps,
cfg_scale,
seamless,
hires_fix,
width,
height,
strength,
@ -148,7 +153,7 @@ const ImageMetadataViewer = memo(
<MetadataItem
label="Fix faces strength"
value={strength}
onClick={() => dispatch(setGfpganStrength(strength))}
onClick={() => dispatch(setFacetoolStrength(strength))}
/>
)}
{type === 'esrgan' && scale !== undefined && (
@ -214,7 +219,14 @@ const ImageMetadataViewer = memo(
<MetadataItem
label="Seamless"
value={seamless}
onClick={() => dispatch(setWidth(seamless))}
onClick={() => dispatch(setSeamless(seamless))}
/>
)}
{hires_fix && (
<MetadataItem
label="High Resolution Optimization"
value={hires_fix}
onClick={() => dispatch(setHiresFix(hires_fix))}
/>
)}
{width && (
@ -311,12 +323,46 @@ const ImageMetadataViewer = memo(
<MetadataItem
label="Strength"
value={strength}
onClick={() =>
dispatch(setGfpganStrength(strength))
}
onClick={() => {
dispatch(setFacetoolStrength(strength));
dispatch(setFacetoolType('gfpgan'));
}}
/>
</Flex>
);
} else if (postprocess.type === 'codeformer') {
const { strength, fidelity } = postprocess;
return (
<Flex
key={i}
pl={'2rem'}
gap={1}
direction={'column'}
>
<Text size={'md'}>{`${
i + 1
}: Face restoration (Codeformer)`}</Text>
<MetadataItem
label="Strength"
value={strength}
onClick={() => {
dispatch(setFacetoolStrength(strength));
dispatch(setFacetoolType('codeformer'));
}}
/>
{fidelity && (
<MetadataItem
label="Fidelity"
value={fidelity}
onClick={() => {
dispatch(setCodeformerFidelity(fidelity));
dispatch(setFacetoolType('codeformer'));
}}
/>
)}
</Flex>
);
}
}
)}

View File

@ -72,7 +72,13 @@ export const gallerySlice = createSlice({
},
addImage: (state, action: PayloadAction<InvokeAI.Image>) => {
const newImage = action.payload;
const { uuid, mtime } = newImage;
const { uuid, url, mtime } = newImage;
// Do not add duplicate images
if (state.images.find((i) => i.url === url && i.mtime === mtime)) {
return;
}
state.images.unshift(newImage);
state.currentImageUuid = uuid;
state.intermediateImage = undefined;
@ -120,8 +126,15 @@ export const gallerySlice = createSlice({
) => {
const { images, areMoreImagesAvailable } = action.payload;
if (images.length > 0) {
// Filter images that already exist in the gallery
const newImages = images.filter(
(newImage) =>
!state.images.find(
(i) => i.url === newImage.url && i.mtime === newImage.mtime
)
);
state.images = state.images
.concat(images)
.concat(newImages)
.sort((a, b) => b.mtime - a.mtime);
if (!state.currentImage) {

View File

@ -6,21 +6,21 @@ import {
useAppSelector,
} from '../../../../app/store';
import IAISwitch from '../../../../common/components/IAISwitch';
import { setShouldRunGFPGAN } from '../../optionsSlice';
import { setShouldRunFacetool } from '../../optionsSlice';
export default function FaceRestore() {
const isGFPGANAvailable = useAppSelector(
(state: RootState) => state.system.isGFPGANAvailable
);
const shouldRunGFPGAN = useAppSelector(
(state: RootState) => state.options.shouldRunGFPGAN
const shouldRunFacetool = useAppSelector(
(state: RootState) => state.options.shouldRunFacetool
);
const dispatch = useAppDispatch();
const handleChangeShouldRunGFPGAN = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setShouldRunGFPGAN(e.target.checked));
const handleChangeShouldRunFacetool = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setShouldRunFacetool(e.target.checked));
return (
<Flex
@ -32,8 +32,8 @@ export default function FaceRestore() {
<p>Restore Face</p>
<IAISwitch
isDisabled={!isGFPGANAvailable}
isChecked={shouldRunGFPGAN}
onChange={handleChangeShouldRunGFPGAN}
isChecked={shouldRunFacetool}
onChange={handleChangeShouldRunFacetool}
/>
</Flex>
);

View File

@ -3,18 +3,29 @@ import { Flex } from '@chakra-ui/react';
import { RootState } from '../../../../app/store';
import { useAppDispatch, useAppSelector } from '../../../../app/store';
import { OptionsState, setGfpganStrength } from '../../optionsSlice';
import {
FacetoolType,
OptionsState,
setCodeformerFidelity,
setFacetoolStrength,
setFacetoolType,
} from '../../optionsSlice';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { SystemState } from '../../../system/systemSlice';
import IAINumberInput from '../../../../common/components/IAINumberInput';
import IAISelect from '../../../../common/components/IAISelect';
import { FACETOOL_TYPES } from '../../../../app/constants';
import { ChangeEvent } from 'react';
const optionsSelector = createSelector(
(state: RootState) => state.options,
(options: OptionsState) => {
return {
gfpganStrength: options.gfpganStrength,
facetoolStrength: options.facetoolStrength,
facetoolType: options.facetoolType,
codeformerFidelity: options.codeformerFidelity,
};
},
{
@ -43,13 +54,26 @@ const systemSelector = createSelector(
*/
const FaceRestoreOptions = () => {
const dispatch = useAppDispatch();
const { gfpganStrength } = useAppSelector(optionsSelector);
const { facetoolStrength, facetoolType, codeformerFidelity } =
useAppSelector(optionsSelector);
const { isGFPGANAvailable } = useAppSelector(systemSelector);
const handleChangeStrength = (v: number) => dispatch(setGfpganStrength(v));
const handleChangeStrength = (v: number) => dispatch(setFacetoolStrength(v));
const handleChangeCodeformerFidelity = (v: number) =>
dispatch(setCodeformerFidelity(v));
const handleChangeFacetoolType = (e: ChangeEvent<HTMLSelectElement>) =>
dispatch(setFacetoolType(e.target.value as FacetoolType));
return (
<Flex direction={'column'} gap={2}>
<IAISelect
label="Type"
validValues={FACETOOL_TYPES.concat()}
value={facetoolType}
onChange={handleChangeFacetoolType}
/>
<IAINumberInput
isDisabled={!isGFPGANAvailable}
label="Strength"
@ -57,10 +81,23 @@ const FaceRestoreOptions = () => {
min={0}
max={1}
onChange={handleChangeStrength}
value={gfpganStrength}
value={facetoolStrength}
width="90px"
isInteger={false}
/>
{facetoolType === 'codeformer' && (
<IAINumberInput
isDisabled={!isGFPGANAvailable}
label="Fidelity"
step={0.05}
min={0}
max={1}
onChange={handleChangeCodeformerFidelity}
value={codeformerFidelity}
width="90px"
isInteger={false}
/>
)}
</Flex>
);
};

View File

@ -0,0 +1,32 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/store';
import { setHiresFix } from './optionsSlice';
import { ChangeEvent } from 'react';
import IAISwitch from '../../common/components/IAISwitch';
/**
* Image output options. Includes width, height, seamless tiling.
*/
const HiresOptions = () => {
const dispatch = useAppDispatch();
const hiresFix = useAppSelector((state: RootState) => state.options.hiresFix);
const handleChangeHiresFix = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setHiresFix(e.target.checked));
return (
<Flex gap={2} direction={'column'}>
<IAISwitch
label="High Res Optimization"
fontSize={'md'}
isChecked={hiresFix}
onChange={handleChangeHiresFix}
/>
</Flex>
);
};
export default HiresOptions;

View File

@ -1,29 +1,14 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/store';
import { setSeamless } from './optionsSlice';
import { ChangeEvent } from 'react';
import IAISwitch from '../../common/components/IAISwitch';
/**
* Image output options. Includes width, height, seamless tiling.
*/
import HiresOptions from './HiresOptions';
import SeamlessOptions from './SeamlessOptions';
const OutputOptions = () => {
const dispatch = useAppDispatch();
const seamless = useAppSelector((state: RootState) => state.options.seamless);
const handleChangeSeamless = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setSeamless(e.target.checked));
return (
<Flex gap={2} direction={'column'}>
<IAISwitch
label="Seamless tiling"
fontSize={'md'}
isChecked={seamless}
onChange={handleChangeSeamless}
/>
<SeamlessOptions />
<HiresOptions />
</Flex>
);
};

View File

@ -68,7 +68,6 @@ const PromptInput = () => {
<div className="prompt-bar">
<FormControl
isInvalid={prompt.length === 0 || Boolean(prompt.match(/^[\s\r\n]+$/))}
isDisabled={isProcessing}
>
<Textarea
id="prompt"

View File

@ -0,0 +1,28 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/store';
import { setSeamless } from './optionsSlice';
import { ChangeEvent } from 'react';
import IAISwitch from '../../common/components/IAISwitch';
const SeamlessOptions = () => {
const dispatch = useAppDispatch();
const seamless = useAppSelector((state: RootState) => state.options.seamless);
const handleChangeSeamless = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setSeamless(e.target.checked));
return (
<Flex gap={2} direction={'column'}>
<IAISwitch
label="Seamless tiling"
fontSize={'md'}
isChecked={seamless}
onChange={handleChangeSeamless}
/>
</Flex>
);
};
export default SeamlessOptions;

View File

@ -3,9 +3,12 @@ import type { PayloadAction } from '@reduxjs/toolkit';
import * as InvokeAI from '../../app/invokeai';
import promptToString from '../../common/util/promptToString';
import { seedWeightsToString } from '../../common/util/seedWeightPairs';
import { FACETOOL_TYPES } from '../../app/constants';
export type UpscalingLevel = 2 | 4;
export type FacetoolType = typeof FACETOOL_TYPES[number];
export interface OptionsState {
prompt: string;
iterations: number;
@ -18,19 +21,22 @@ export interface OptionsState {
perlin: number;
seed: number;
img2imgStrength: number;
gfpganStrength: number;
facetoolType: FacetoolType;
facetoolStrength: number;
codeformerFidelity: number;
upscalingLevel: UpscalingLevel;
upscalingStrength: number;
shouldUseInitImage: boolean;
initialImagePath: string | null;
maskPath: string;
seamless: boolean;
hiresFix: boolean;
shouldFitToWidthHeight: boolean;
shouldGenerateVariations: boolean;
variationAmount: number;
seedWeights: string;
shouldRunESRGAN: boolean;
shouldRunGFPGAN: boolean;
shouldRunFacetool: boolean;
shouldRandomizeSeed: boolean;
showAdvancedOptions: boolean;
activeTab: number;
@ -50,6 +56,7 @@ const initialOptionsState: OptionsState = {
perlin: 0,
seed: 0,
seamless: false,
hiresFix: false,
shouldUseInitImage: false,
img2imgStrength: 0.75,
initialImagePath: null,
@ -61,8 +68,10 @@ const initialOptionsState: OptionsState = {
shouldRunESRGAN: false,
upscalingLevel: 4,
upscalingStrength: 0.75,
shouldRunGFPGAN: false,
gfpganStrength: 0.8,
shouldRunFacetool: false,
facetoolStrength: 0.8,
facetoolType: 'gfpgan',
codeformerFidelity: 0.75,
shouldRandomizeSeed: true,
showAdvancedOptions: true,
activeTab: 0,
@ -115,8 +124,11 @@ export const optionsSlice = createSlice({
setImg2imgStrength: (state, action: PayloadAction<number>) => {
state.img2imgStrength = action.payload;
},
setGfpganStrength: (state, action: PayloadAction<number>) => {
state.gfpganStrength = action.payload;
setFacetoolStrength: (state, action: PayloadAction<number>) => {
state.facetoolStrength = action.payload;
},
setCodeformerFidelity: (state, action: PayloadAction<number>) => {
state.codeformerFidelity = action.payload;
},
setUpscalingLevel: (state, action: PayloadAction<UpscalingLevel>) => {
state.upscalingLevel = action.payload;
@ -138,6 +150,9 @@ export const optionsSlice = createSlice({
setSeamless: (state, action: PayloadAction<boolean>) => {
state.seamless = action.payload;
},
setHiresFix: (state, action: PayloadAction<boolean>) => {
state.hiresFix = action.payload;
},
setShouldFitToWidthHeight: (state, action: PayloadAction<boolean>) => {
state.shouldFitToWidthHeight = action.payload;
},
@ -168,6 +183,67 @@ export const optionsSlice = createSlice({
setSeedWeights: (state, action: PayloadAction<string>) => {
state.seedWeights = action.payload;
},
setAllTextToImageParameters: (
state,
action: PayloadAction<InvokeAI.Metadata>
) => {
const {
sampler,
prompt,
seed,
variations,
steps,
cfg_scale,
threshold,
perlin,
seamless,
hires_fix,
width,
height,
} = action.payload.image;
if (variations && variations.length > 0) {
state.seedWeights = seedWeightsToString(variations);
state.shouldGenerateVariations = true;
} else {
state.shouldGenerateVariations = false;
}
if (seed) {
state.seed = seed;
state.shouldRandomizeSeed = false;
}
if (prompt) state.prompt = promptToString(prompt);
if (sampler) state.sampler = sampler;
if (steps) state.steps = steps;
if (cfg_scale) state.cfgScale = cfg_scale;
if (threshold) state.threshold = threshold;
if (typeof threshold === 'undefined') state.threshold = 0;
if (perlin) state.perlin = perlin;
if (typeof perlin === 'undefined') state.perlin = 0;
if (typeof seamless === 'boolean') state.seamless = seamless;
if (typeof hires_fix === 'boolean') state.hiresFix = hires_fix;
if (width) state.width = width;
if (height) state.height = height;
},
setAllImageToImageParameters: (
state,
action: PayloadAction<InvokeAI.Metadata>
) => {
const { type, strength, fit, init_image_path, mask_image_path } =
action.payload.image;
if (type === 'img2img') {
if (init_image_path) state.initialImagePath = init_image_path;
if (mask_image_path) state.maskPath = mask_image_path;
if (strength) state.img2imgStrength = strength;
if (typeof fit === 'boolean') state.shouldFitToWidthHeight = fit;
state.shouldUseInitImage = true;
} else {
state.shouldUseInitImage = false;
}
},
setAllParameters: (state, action: PayloadAction<InvokeAI.Metadata>) => {
const {
type,
@ -180,6 +256,7 @@ export const optionsSlice = createSlice({
threshold,
perlin,
seamless,
hires_fix,
width,
height,
strength,
@ -210,43 +287,6 @@ export const optionsSlice = createSlice({
state.shouldRandomizeSeed = false;
}
/**
* We support arbitrary numbers of postprocessing steps, so it
* doesnt make sense to be include postprocessing metadata when
* we use all parameters. Because this code needed a bit of braining
* to figure out, I am leaving it, in case it is needed again.
*/
// let postprocessingNotDone = ['gfpgan', 'esrgan'];
// if (postprocessing && postprocessing.length > 0) {
// postprocessing.forEach(
// (postprocess: InvokeAI.PostProcessedImageMetadata) => {
// if (postprocess.type === 'gfpgan') {
// const { strength } = postprocess;
// if (strength) state.gfpganStrength = strength;
// state.shouldRunGFPGAN = true;
// postprocessingNotDone = postprocessingNotDone.filter(
// (p) => p !== 'gfpgan'
// );
// }
// if (postprocess.type === 'esrgan') {
// const { scale, strength } = postprocess;
// if (scale) state.upscalingLevel = scale;
// if (strength) state.upscalingStrength = strength;
// state.shouldRunESRGAN = true;
// postprocessingNotDone = postprocessingNotDone.filter(
// (p) => p !== 'esrgan'
// );
// }
// }
// );
// }
// postprocessingNotDone.forEach((p) => {
// if (p === 'esrgan') state.shouldRunESRGAN = false;
// if (p === 'gfpgan') state.shouldRunGFPGAN = false;
// });
if (prompt) state.prompt = promptToString(prompt);
if (sampler) state.sampler = sampler;
if (steps) state.steps = steps;
@ -254,8 +294,9 @@ export const optionsSlice = createSlice({
if (threshold) state.threshold = threshold;
if (typeof threshold === 'undefined') state.threshold = 0;
if (perlin) state.perlin = perlin;
if (typeof perlin === 'undefined') state.perlin = 0;
if (typeof perlin === 'undefined') state.perlin = 0;
if (typeof seamless === 'boolean') state.seamless = seamless;
if (typeof hires_fix === 'boolean') state.hiresFix = hires_fix;
if (width) state.width = width;
if (height) state.height = height;
},
@ -265,8 +306,11 @@ export const optionsSlice = createSlice({
...initialOptionsState,
};
},
setShouldRunGFPGAN: (state, action: PayloadAction<boolean>) => {
state.shouldRunGFPGAN = action.payload;
setShouldRunFacetool: (state, action: PayloadAction<boolean>) => {
state.shouldRunFacetool = action.payload;
},
setFacetoolType: (state, action: PayloadAction<FacetoolType>) => {
state.facetoolType = action.payload;
},
setShouldRunESRGAN: (state, action: PayloadAction<boolean>) => {
state.shouldRunESRGAN = action.payload;
@ -301,8 +345,11 @@ export const {
setSampler,
setSeed,
setSeamless,
setHiresFix,
setImg2imgStrength,
setGfpganStrength,
setFacetoolStrength,
setFacetoolType,
setCodeformerFidelity,
setUpscalingLevel,
setUpscalingStrength,
setShouldUseInitImage,
@ -316,13 +363,15 @@ export const {
setSeedWeights,
setVariationAmount,
setAllParameters,
setShouldRunGFPGAN,
setShouldRunFacetool,
setShouldRunESRGAN,
setShouldRandomizeSeed,
setShowAdvancedOptions,
setActiveTab,
setShouldShowImageDetails,
setShouldShowGallery,
setAllTextToImageParameters,
setAllImageToImageParameters,
} = optionsSlice.actions;
export default optionsSlice.reducer;

View File

@ -1,4 +1,4 @@
import { IconButton, Image } from '@chakra-ui/react';
import { IconButton, Image, useToast } from '@chakra-ui/react';
import React, { SyntheticEvent } from 'react';
import { MdClear } from 'react-icons/md';
import { RootState, useAppDispatch, useAppSelector } from '../../../app/store';
@ -11,10 +11,23 @@ export default function InitImagePreview() {
const dispatch = useAppDispatch();
const toast = useToast();
const handleClickResetInitialImage = (e: SyntheticEvent) => {
e.stopPropagation();
dispatch(setInitialImagePath(null));
};
const alertMissingInitImage = () => {
toast({
title: 'Problem loading parameters',
description: 'Unable to load init image.',
status: 'error',
isClosable: true,
});
dispatch(setInitialImagePath(null));
};
return (
<div className="init-image-preview">
<div className="init-image-preview-header">
@ -29,7 +42,12 @@ export default function InitImagePreview() {
</div>
{initialImagePath && (
<div className="init-image-image">
<Image fit={'contain'} src={initialImagePath} rounded={'md'} />
<Image
fit={'contain'}
src={initialImagePath}
rounded={'md'}
onError={alertMissingInitImage}
/>
</div>
)}
</div>

View File

@ -50,8 +50,13 @@ export const tab_dict = {
},
};
// Array where index maps to the key of tab_dict
export const tabMap = _.map(tab_dict, (tab, key) => key);
// Use tabMap to generate a union type of tab names
const tabMapTypes = [...tabMap] as const;
export type InvokeTabName = typeof tabMapTypes[number];
export default function InvokeTabs() {
const activeTab = useAppSelector(
(state: RootState) => state.options.activeTab

View File

@ -95,4 +95,9 @@
// Gallery
--gallery-resizeable-color: rgb(36, 38, 48);
// Context Menus
--context-menu-bg-color: rgb(46, 48, 58);
--context-menu-box-shadow: none;
--context-menu-bg-color-hover: rgb(30, 32, 42);
}

View File

@ -94,4 +94,11 @@
// Gallery
--gallery-resizeable-color: rgb(192, 194, 196);
// Context Menus
--context-menu-bg-color: var(--background-color);
--context-menu-box-shadow: 0px 10px 38px -10px rgba(22, 23, 24, 0.35),
0px 10px 20px -15px rgba(22, 23, 24, 0.2);
--context-menu-bg-color-hover: var(--background-color-secondary);
}

View File

@ -5,6 +5,7 @@ import eslint from 'vite-plugin-eslint';
// https://vitejs.dev/config/
export default defineConfig(({ mode }) => {
const common = {
base: '',
plugins: [react(), eslint()],
server: {
// Proxy HTTP requests to the flask server

View File

@ -213,6 +213,13 @@
dependencies:
regenerator-runtime "^0.13.4"
"@babel/runtime@^7.13.10":
version "7.19.4"
resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.19.4.tgz#a42f814502ee467d55b38dd1c256f53a7b885c78"
integrity sha512-EXpLCrk55f+cYqmHsSR+yD/0gAIMxxA9QK9lnQWzhMCvt+YmoBN7Zx94s++Kv0+unHk39vxNO8t+CMA2WSS3wA==
dependencies:
regenerator-runtime "^0.13.4"
"@babel/template@^7.18.10":
version "7.18.10"
resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.18.10.tgz#6f9134835970d1dbf0835c0d100c9f38de0c5e71"
@ -1122,6 +1129,26 @@
minimatch "^3.1.2"
strip-json-comments "^3.1.1"
"@floating-ui/core@^0.7.3":
version "0.7.3"
resolved "https://registry.yarnpkg.com/@floating-ui/core/-/core-0.7.3.tgz#d274116678ffae87f6b60e90f88cc4083eefab86"
integrity sha512-buc8BXHmG9l82+OQXOFU3Kr2XQx9ys01U/Q9HMIrZ300iLc8HLMgh7dcCqgYzAzf4BkoQvDcXf5Y+CuEZ5JBYg==
"@floating-ui/dom@^0.5.3":
version "0.5.4"
resolved "https://registry.yarnpkg.com/@floating-ui/dom/-/dom-0.5.4.tgz#4eae73f78bcd4bd553ae2ade30e6f1f9c73fe3f1"
integrity sha512-419BMceRLq0RrmTSDxn8hf9R3VCJv2K9PUfugh5JyEFmdjzDo+e8U5EdR8nzKq8Yj1htzLm3b6eQEEam3/rrtg==
dependencies:
"@floating-ui/core" "^0.7.3"
"@floating-ui/react-dom@0.7.2":
version "0.7.2"
resolved "https://registry.yarnpkg.com/@floating-ui/react-dom/-/react-dom-0.7.2.tgz#0bf4ceccb777a140fc535c87eb5d6241c8e89864"
integrity sha512-1T0sJcpHgX/u4I1OzIEhlcrvkUN8ln39nz7fMoE/2HDHrPiMFoOGR7++GYyfUmIQHkkrTinaeQsO3XWubjSvGg==
dependencies:
"@floating-ui/dom" "^0.5.3"
use-isomorphic-layout-effect "^1.1.1"
"@humanwhocodes/config-array@^0.10.4":
version "0.10.4"
resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.10.4.tgz#01e7366e57d2ad104feea63e72248f22015c520c"
@ -1265,6 +1292,246 @@
resolved "https://registry.yarnpkg.com/@popperjs/core/-/core-2.11.6.tgz#cee20bd55e68a1720bdab363ecf0c821ded4cd45"
integrity sha512-50/17A98tWUfQ176raKiOGXuYpLyyVMkxxG6oylzL3BPOlA6ADGdK7EYunSa4I064xerltq9TGXs8HmOk5E+vw==
"@radix-ui/primitive@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/primitive/-/primitive-1.0.0.tgz#e1d8ef30b10ea10e69c76e896f608d9276352253"
integrity sha512-3e7rn8FDMin4CgeL7Z/49smCA3rFYY3Ha2rUQ7HRWFadS5iCRw08ZgVT1LaNTCNqgvrUiyczLflrVrF0SRQtNA==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-arrow@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-arrow/-/react-arrow-1.0.1.tgz#5246adf79e97f89e819af68da51ddcf349ecf1c4"
integrity sha512-1yientwXqXcErDHEv8av9ZVNEBldH8L9scVR3is20lL+jOCfcJyMFZFEY5cgIrgexsq1qggSXqiEL/d/4f+QXA==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-collection@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-collection/-/react-collection-1.0.1.tgz#259506f97c6703b36291826768d3c1337edd1de5"
integrity sha512-uuiFbs+YCKjn3X1DTSx9G7BHApu4GHbi3kgiwsnFUbOKCrwejAJv4eE4Vc8C0Oaxt9T0aV4ox0WCOdx+39Xo+g==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-context" "1.0.0"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-slot" "1.0.1"
"@radix-ui/react-compose-refs@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-compose-refs/-/react-compose-refs-1.0.0.tgz#37595b1f16ec7f228d698590e78eeed18ff218ae"
integrity sha512-0KaSv6sx787/hK3eF53iOkiSLwAGlFMx5lotrqD2pTjB18KbybKoEIgkNZTKC60YECDQTKGTRcDBILwZVqVKvA==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-context-menu@^2.0.1":
version "2.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-context-menu/-/react-context-menu-2.0.1.tgz#aee7c81bac9983b3748284bf3925dd63796c90b4"
integrity sha512-7DuhU4xDcUk3AMJUlb5tHHOvJZ1GF4+snDIpjtWGlTvO0VktNKgbvBuGLlirdkYoUSI0mJXwOUcUXQapgIyefw==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/primitive" "1.0.0"
"@radix-ui/react-context" "1.0.0"
"@radix-ui/react-menu" "2.0.1"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-use-callback-ref" "1.0.0"
"@radix-ui/react-use-controllable-state" "1.0.0"
"@radix-ui/react-context@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-context/-/react-context-1.0.0.tgz#f38e30c5859a9fb5e9aa9a9da452ee3ed9e0aee0"
integrity sha512-1pVM9RfOQ+n/N5PJK33kRSKsr1glNxomxONs5c49MliinBY6Yw2Q995qfBUUo0/Mbg05B/sGA0gkgPI7kmSHBg==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-direction@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-direction/-/react-direction-1.0.0.tgz#a2e0b552352459ecf96342c79949dd833c1e6e45"
integrity sha512-2HV05lGUgYcA6xgLQ4BKPDmtL+QbIZYH5fCOTAOOcJ5O0QbWS3i9lKaurLzliYUDhORI2Qr3pyjhJh44lKA3rQ==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-dismissable-layer@1.0.2":
version "1.0.2"
resolved "https://registry.yarnpkg.com/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.0.2.tgz#f04d1061bddf00b1ca304148516b9ddc62e45fb2"
integrity sha512-WjJzMrTWROozDqLB0uRWYvj4UuXsM/2L19EmQ3Au+IJWqwvwq9Bwd+P8ivo0Deg9JDPArR1I6MbWNi1CmXsskg==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/primitive" "1.0.0"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-use-callback-ref" "1.0.0"
"@radix-ui/react-use-escape-keydown" "1.0.2"
"@radix-ui/react-focus-guards@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-focus-guards/-/react-focus-guards-1.0.0.tgz#339c1c69c41628c1a5e655f15f7020bf11aa01fa"
integrity sha512-UagjDk4ijOAnGu4WMUPj9ahi7/zJJqNZ9ZAiGPp7waUWJO0O1aWXi/udPphI0IUjvrhBsZJGSN66dR2dsueLWQ==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-focus-scope@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-focus-scope/-/react-focus-scope-1.0.1.tgz#faea8c25f537c5a5c38c50914b63722db0e7f951"
integrity sha512-Ej2MQTit8IWJiS2uuujGUmxXjF/y5xZptIIQnyd2JHLwtV0R2j9NRVoRj/1j/gJ7e3REdaBw4Hjf4a1ImhkZcQ==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-use-callback-ref" "1.0.0"
"@radix-ui/react-id@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-id/-/react-id-1.0.0.tgz#8d43224910741870a45a8c9d092f25887bb6d11e"
integrity sha512-Q6iAB/U7Tq3NTolBBQbHTgclPmGWE3OlktGGqrClPozSw4vkQ1DfQAOtzgRPecKsMdJINE05iaoDUG8tRzCBjw==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-use-layout-effect" "1.0.0"
"@radix-ui/react-menu@2.0.1":
version "2.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-menu/-/react-menu-2.0.1.tgz#44ebfd45d8482db678b935c0b9d1102d683372d8"
integrity sha512-I5FFZQxCl2fHoJ7R0m5/oWA9EX8/ttH4AbgneoCH7DAXQioFeb0XMAYnOVSp1GgJZ1Nx/mohxNQSeTMcaF1YPw==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/primitive" "1.0.0"
"@radix-ui/react-collection" "1.0.1"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-context" "1.0.0"
"@radix-ui/react-direction" "1.0.0"
"@radix-ui/react-dismissable-layer" "1.0.2"
"@radix-ui/react-focus-guards" "1.0.0"
"@radix-ui/react-focus-scope" "1.0.1"
"@radix-ui/react-id" "1.0.0"
"@radix-ui/react-popper" "1.0.1"
"@radix-ui/react-portal" "1.0.1"
"@radix-ui/react-presence" "1.0.0"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-roving-focus" "1.0.1"
"@radix-ui/react-slot" "1.0.1"
"@radix-ui/react-use-callback-ref" "1.0.0"
aria-hidden "^1.1.1"
react-remove-scroll "2.5.5"
"@radix-ui/react-popper@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-popper/-/react-popper-1.0.1.tgz#9fa8a6a493404afa225866a5cd75af23d141baa0"
integrity sha512-J4Vj7k3k+EHNWgcKrE+BLlQfpewxA7Zd76h5I0bIa+/EqaIZ3DuwrbPj49O3wqN+STnXsBuxiHLiF0iU3yfovw==
dependencies:
"@babel/runtime" "^7.13.10"
"@floating-ui/react-dom" "0.7.2"
"@radix-ui/react-arrow" "1.0.1"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-context" "1.0.0"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-use-layout-effect" "1.0.0"
"@radix-ui/react-use-rect" "1.0.0"
"@radix-ui/react-use-size" "1.0.0"
"@radix-ui/rect" "1.0.0"
"@radix-ui/react-portal@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-portal/-/react-portal-1.0.1.tgz#169c5a50719c2bb0079cf4c91a27aa6d37e5dd33"
integrity sha512-NY2vUWI5WENgAT1nfC6JS7RU5xRYBfjZVLq0HmgEN1Ezy3rk/UruMV4+Rd0F40PEaFC5SrLS1ixYvcYIQrb4Ig==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-presence@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-presence/-/react-presence-1.0.0.tgz#814fe46df11f9a468808a6010e3f3ca7e0b2e84a"
integrity sha512-A+6XEvN01NfVWiKu38ybawfHsBjWum42MRPnEuqPsBZ4eV7e/7K321B5VgYMPv3Xx5An6o1/l9ZuDBgmcmWK3w==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-use-layout-effect" "1.0.0"
"@radix-ui/react-primitive@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-primitive/-/react-primitive-1.0.1.tgz#c1ebcce283dd2f02e4fbefdaa49d1cb13dbc990a"
integrity sha512-fHbmislWVkZaIdeF6GZxF0A/NH/3BjrGIYj+Ae6eTmTCr7EB0RQAAVEiqsXK6p3/JcRqVSBQoceZroj30Jj3XA==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-slot" "1.0.1"
"@radix-ui/react-roving-focus@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-roving-focus/-/react-roving-focus-1.0.1.tgz#475621f63aee43faa183a5270f35d49e530de3d7"
integrity sha512-TB76u5TIxKpqMpUAuYH2VqMhHYKa+4Vs1NHygo/llLvlffN6mLVsFhz0AnSFlSBAvTBYVHYAkHAyEt7x1gPJOA==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/primitive" "1.0.0"
"@radix-ui/react-collection" "1.0.1"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-context" "1.0.0"
"@radix-ui/react-direction" "1.0.0"
"@radix-ui/react-id" "1.0.0"
"@radix-ui/react-primitive" "1.0.1"
"@radix-ui/react-use-callback-ref" "1.0.0"
"@radix-ui/react-use-controllable-state" "1.0.0"
"@radix-ui/react-slot@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@radix-ui/react-slot/-/react-slot-1.0.1.tgz#e7868c669c974d649070e9ecbec0b367ee0b4d81"
integrity sha512-avutXAFL1ehGvAXtPquu0YK5oz6ctS474iM3vNGQIkswrVhdrS52e3uoMQBzZhNRAIE0jBnUyXWNmSjGHhCFcw==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-compose-refs" "1.0.0"
"@radix-ui/react-use-callback-ref@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.0.tgz#9e7b8b6b4946fe3cbe8f748c82a2cce54e7b6a90"
integrity sha512-GZtyzoHz95Rhs6S63D2t/eqvdFCm7I+yHMLVQheKM7nBD8mbZIt+ct1jz4536MDnaOGKIxynJ8eHTkVGVVkoTg==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-use-controllable-state@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.0.0.tgz#a64deaafbbc52d5d407afaa22d493d687c538b7f"
integrity sha512-FohDoZvk3mEXh9AWAVyRTYR4Sq7/gavuofglmiXB2g1aKyboUD4YtgWxKj8O5n+Uak52gXQ4wKz5IFST4vtJHg==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-use-callback-ref" "1.0.0"
"@radix-ui/react-use-escape-keydown@1.0.2":
version "1.0.2"
resolved "https://registry.yarnpkg.com/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.0.2.tgz#09ab6455ab240b4f0a61faf06d4e5132c4d639f6"
integrity sha512-DXGim3x74WgUv+iMNCF+cAo8xUHHeqvjx8zs7trKf+FkQKPQXLk2sX7Gx1ysH7Q76xCpZuxIJE7HLPxRE+Q+GA==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-use-callback-ref" "1.0.0"
"@radix-ui/react-use-layout-effect@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.0.tgz#2fc19e97223a81de64cd3ba1dc42ceffd82374dc"
integrity sha512-6Tpkq+R6LOlmQb1R5NNETLG0B4YP0wc+klfXafpUCj6JGyaUc8il7/kUZ7m59rGbXGczE9Bs+iz2qloqsZBduQ==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-use-rect@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-use-rect/-/react-use-rect-1.0.0.tgz#b040cc88a4906b78696cd3a32b075ed5b1423b3e"
integrity sha512-TB7pID8NRMEHxb/qQJpvSt3hQU4sqNPM1VCTjTRjEOa7cEop/QMuq8S6fb/5Tsz64kqSvB9WnwsDHtjnrM9qew==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/rect" "1.0.0"
"@radix-ui/react-use-size@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/react-use-size/-/react-use-size-1.0.0.tgz#a0b455ac826749419f6354dc733e2ca465054771"
integrity sha512-imZ3aYcoYCKhhgNpkNDh/aTiU05qw9hX+HHI1QDBTyIlcFjgeFlKKySNGMwTp7nYFLQg/j0VA2FmCY4WPDDHMg==
dependencies:
"@babel/runtime" "^7.13.10"
"@radix-ui/react-use-layout-effect" "1.0.0"
"@radix-ui/rect@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@radix-ui/rect/-/rect-1.0.0.tgz#0dc8e6a829ea2828d53cbc94b81793ba6383bf3c"
integrity sha512-d0O68AYy/9oeEy1DdC07bz1/ZXX+DqCskRd3i4JzLSTXwefzaepQrKjXC7aNM8lTHjFLDO0pDgaEiQ7jEk+HVg==
dependencies:
"@babel/runtime" "^7.13.10"
"@reduxjs/toolkit@^1.8.5":
version "1.8.5"
resolved "https://registry.yarnpkg.com/@reduxjs/toolkit/-/toolkit-1.8.5.tgz#c14bece03ee08be88467f22dc0ecf9cf875527cd"
@ -2850,11 +3117,6 @@ react-is@^18.0.0:
resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b"
integrity sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==
react-masonry-css@^1.0.16:
version "1.0.16"
resolved "https://registry.yarnpkg.com/react-masonry-css/-/react-masonry-css-1.0.16.tgz#72b28b4ae3484e250534700860597553a10f1a2c"
integrity sha512-KSW0hR2VQmltt/qAa3eXOctQDyOu7+ZBevtKgpNDSzT7k5LA/0XntNa9z9HKCdz3QlxmJHglTZ18e4sX4V8zZQ==
react-redux@^8.0.2:
version "8.0.2"
resolved "https://registry.yarnpkg.com/react-redux/-/react-redux-8.0.2.tgz#bc2a304bb21e79c6808e3e47c50fe1caf62f7aad"
@ -2880,7 +3142,7 @@ react-remove-scroll-bar@^2.3.3:
react-style-singleton "^2.2.1"
tslib "^2.0.0"
react-remove-scroll@^2.5.4:
react-remove-scroll@2.5.5, react-remove-scroll@^2.5.4:
version "2.5.5"
resolved "https://registry.yarnpkg.com/react-remove-scroll/-/react-remove-scroll-2.5.5.tgz#1e31a1260df08887a8a0e46d09271b52b3a37e77"
integrity sha512-ImKhrzJJsyXJfBZ4bzu8Bwpka14c/fQt0k+cyFp/PBhTfyDnU5hjOtM4AG/0AMyy8oKzOTR0lDgJIM7pYXI0kw==
@ -3255,6 +3517,11 @@ use-callback-ref@^1.3.0:
dependencies:
tslib "^2.0.0"
use-isomorphic-layout-effect@^1.1.1:
version "1.1.2"
resolved "https://registry.yarnpkg.com/use-isomorphic-layout-effect/-/use-isomorphic-layout-effect-1.1.2.tgz#497cefb13d863d687b08477d9e5a164ad8c1a6fb"
integrity sha512-49L8yCO3iGT/ZF9QttjwLF/ZD9Iwto5LnH5LmEdk/6cFmXddqi2ulF0edxTwjj+7mqvpVVGQWvbXZdn32wRSHA==
use-sidecar@^1.1.2:
version "1.1.2"
resolved "https://registry.yarnpkg.com/use-sidecar/-/use-sidecar-1.1.2.tgz#2f43126ba2d7d7e117aa5855e5d8f0276dfe73c2"

View File

@ -33,7 +33,10 @@ from ldm.invoke.args import metadata_from_png
from ldm.invoke.image_util import InitImageResizer
from ldm.invoke.devices import choose_torch_device, choose_precision
from ldm.invoke.conditioning import get_uc_and_c
from ldm.invoke.model_cache import ModelCache
from ldm.invoke.seamless import configure_model_padding
from ldm.invoke.txt2mask import Txt2Mask, SegmentedGrayscale
def fix_func(orig):
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
def new_func(*args, **kw):
@ -52,6 +55,9 @@ torch.randint_like = fix_func(torch.randint_like)
torch.bernoulli = fix_func(torch.bernoulli)
torch.multinomial = fix_func(torch.multinomial)
# this is fallback model in case no default is defined
FALLBACK_MODEL_NAME='stable-diffusion-1.4'
"""Simplified text to image API for stable diffusion/latent diffusion
Example Usage:
@ -104,12 +110,13 @@ still work.
The full list of arguments to Generate() are:
gr = Generate(
# these values are set once and shouldn't be changed
conf = path to configuration file ('configs/models.yaml')
model = symbolic name of the model in the configuration file
precision = float precision to be used
conf:str = path to configuration file ('configs/models.yaml')
model:str = symbolic name of the model in the configuration file
precision:float = float precision to be used
safety_checker:bool = activate safety checker [False]
# this value is sticky and maintained between generation calls
sampler_name = ['ddim', 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms', 'plms'] // k_lms
sampler_name:str = ['ddim', 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms', 'plms'] // k_lms
# these are deprecated - use conf and model instead
weights = path to model weights ('models/ldm/stable-diffusion-v1/model.ckpt')
@ -126,27 +133,26 @@ class Generate:
def __init__(
self,
model = 'stable-diffusion-1.4',
conf = 'configs/models.yaml',
embedding_path = None,
sampler_name = 'k_lms',
ddim_eta = 0.0, # deterministic
full_precision = False,
precision = 'auto',
# these are deprecated; if present they override values in the conf file
weights = None,
config = None,
model = None,
conf = 'configs/models.yaml',
embedding_path = None,
sampler_name = 'k_lms',
ddim_eta = 0.0, # deterministic
full_precision = False,
precision = 'auto',
gfpgan=None,
codeformer=None,
esrgan=None,
free_gpu_mem=False,
safety_checker:bool=False,
# these are deprecated; if present they override values in the conf file
weights = None,
config = None,
):
models = OmegaConf.load(conf)
mconfig = models[model]
self.weights = mconfig.weights if weights is None else weights
self.config = mconfig.config if config is None else config
self.height = mconfig.height
self.width = mconfig.width
mconfig = OmegaConf.load(conf)
self.height = None
self.width = None
self.model_cache = None
self.iterations = 1
self.steps = 50
self.cfg_scale = 7.5
@ -155,8 +161,11 @@ class Generate:
self.precision = precision
self.strength = 0.75
self.seamless = False
self.seamless_axes = {'x','y'}
self.hires_fix = False
self.embedding_path = embedding_path
self.model = None # empty for now
self.model_hash = None
self.sampler = None
self.device = None
self.session_peakmem = None
@ -167,11 +176,15 @@ class Generate:
self.codeformer = codeformer
self.esrgan = esrgan
self.free_gpu_mem = free_gpu_mem
self.size_matters = True # used to warn once about large image sizes and VRAM
self.txt2mask = None
self.safety_checker = None
# Note that in previous versions, there was an option to pass the
# device to Generate(). However the device was then ignored, so
# it wasn't actually doing anything. This logic could be reinstated.
device_type = choose_torch_device()
print(f'>> Using device_type {device_type}')
self.device = torch.device(device_type)
if full_precision:
if self.precision != 'auto':
@ -182,6 +195,10 @@ class Generate:
if self.precision == 'auto':
self.precision = choose_precision(self.device)
# model caching system for fast switching
self.model_cache = ModelCache(mconfig,self.device,self.precision)
self.model_name = model or self.model_cache.default_model() or FALLBACK_MODEL_NAME
# for VRAM usage statistics
self.session_peakmem = torch.cuda.max_memory_allocated() if self._has_cuda else None
transformers.logging.set_verbosity_error()
@ -189,6 +206,19 @@ class Generate:
# gets rid of annoying messages about random seed
logging.getLogger('pytorch_lightning').setLevel(logging.ERROR)
# load safety checker if requested
if safety_checker:
try:
print('>> Initializing safety checker')
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from transformers import AutoFeatureExtractor
safety_model_id = "CompVis/stable-diffusion-safety-checker"
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id, local_files_only=True)
self.safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id, local_files_only=True)
except Exception:
print('** An error was encountered while installing the safety checker:')
print(traceback.format_exc())
def prompt2png(self, prompt, outdir, **kwargs):
"""
Takes a prompt and an output directory, writes out the requested number
@ -233,6 +263,7 @@ class Generate:
height = None,
sampler_name = None,
seamless = False,
seamless_axes = {'x','y'},
log_tokenization = False,
with_variations = None,
variation_amount = 0.0,
@ -241,6 +272,7 @@ class Generate:
# these are specific to img2img and inpaint
init_img = None,
init_mask = None,
text_mask = None,
fit = False,
strength = None,
init_color = None,
@ -249,13 +281,18 @@ class Generate:
embiggen_tiles = None,
# these are specific to GFPGAN/ESRGAN
facetool = None,
gfpgan_strength = 0,
facetool_strength = 0,
codeformer_fidelity = None,
save_original = False,
upscale = None,
# this is specific to inpainting and causes more extreme inpainting
inpaint_replace = 0.0,
# This will help match inpainted areas to the original image more smoothly
mask_blur_radius: int = 8,
# Set this True to handle KeyboardInterrupt internally
catch_interrupts = False,
hires_fix = False,
use_mps_noise = False,
**args,
): # eat up additional cruft
"""
@ -269,9 +306,12 @@ class Generate:
height // height of image, in multiples of 64 (512)
cfg_scale // how strongly the prompt influences the image (7.5) (must be >1)
seamless // whether the generated image should tile
hires_fix // whether the Hires Fix should be applied during generation
init_img // path to an initial image
init_mask // path to a mask for the initial image
text_mask // a text string that will be used to guide clipseg generation of the init_mask
strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
facetool_strength // strength for GFPGAN/CodeFormer. 0.0 preserves image exactly, 1.0 replaces it completely
ddim_eta // image randomness (eta=0.0 means the same seed always produces the same image)
step_callback // a function or method that will be called each step
image_callback // a function or method that will be called each time an image is generated
@ -302,6 +342,8 @@ class Generate:
width = width or self.width
height = height or self.height
seamless = seamless or self.seamless
seamless_axes = seamless_axes or self.seamless_axes
hires_fix = hires_fix or self.hires_fix
cfg_scale = cfg_scale or self.cfg_scale
ddim_eta = ddim_eta or self.ddim_eta
iterations = iterations or self.iterations
@ -312,11 +354,14 @@ class Generate:
with_variations = [] if with_variations is None else with_variations
# will instantiate the model or return it from cache
model = self.load_model()
for m in model.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
m.padding_mode = 'circular' if seamless else m._orig_padding_mode
model = self.set_model(self.model_name)
# self.width and self.height are set by set_model()
# to the width and height of the image training set
width = width or self.width
height = height or self.height
configure_model_padding(model, seamless, seamless_axes)
assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0'
assert threshold >= 0.0, '--threshold must be >=0.0'
@ -344,6 +389,7 @@ class Generate:
f'variation weights must be in [0.0, 1.0]: got {[weight for _, weight in with_variations]}'
width, height, _ = self._resolution_check(width, height, log=True)
assert inpaint_replace >=0.0 and inpaint_replace <= 1.0,'inpaint_replace must be between 0.0 and 1.0'
if sampler_name and (sampler_name != self.sampler_name):
self.sampler_name = sampler_name
@ -364,13 +410,16 @@ class Generate:
log_tokens =self.log_tokenization
)
init_image,mask_image = self._make_images(
init_image, mask_image = self._make_images(
init_img,
init_mask,
width,
height,
fit=fit,
text_mask=text_mask,
)
# TODO: Hacky selection of operation to perform. Needs to be refactored.
if (init_image is not None) and (mask_image is not None):
generator = self._make_inpaint()
elif (embiggen != None or embiggen_tiles != None):
@ -385,6 +434,13 @@ class Generate:
generator.set_variation(
self.seed, variation_amount, with_variations
)
generator.use_mps_noise = use_mps_noise
checker = {
'checker':self.safety_checker,
'extractor':self.safety_feature_extractor
} if self.safety_checker else None
results = generator.generate(
prompt,
iterations=iterations,
@ -395,10 +451,10 @@ class Generate:
conditioning=(uc, c),
ddim_eta=ddim_eta,
image_callback=image_callback, # called after the final image is generated
step_callback=step_callback, # called after each intermediate image is generated
step_callback=step_callback, # called after each intermediate image is generated
width=width,
height=height,
init_img=init_img, # embiggen needs to manipulate from the unmodified init_img
init_img=init_img, # embiggen needs to manipulate from the unmodified init_img
init_image=init_image, # notice that init_image is different from init_img
mask_image=mask_image,
strength=strength,
@ -406,6 +462,9 @@ class Generate:
perlin=perlin,
embiggen=embiggen,
embiggen_tiles=embiggen_tiles,
inpaint_replace=inpaint_replace,
mask_blur_radius=mask_blur_radius,
safety_checker=checker
)
if init_color:
@ -413,11 +472,11 @@ class Generate:
reference_image_path = init_color,
image_callback = image_callback)
if upscale is not None or gfpgan_strength > 0:
if upscale is not None or facetool_strength > 0:
self.upscale_and_reconstruct(results,
upscale = upscale,
facetool = facetool,
strength = gfpgan_strength,
strength = facetool_strength,
codeformer_fidelity = codeformer_fidelity,
save_original = save_original,
image_callback = image_callback)
@ -460,7 +519,7 @@ class Generate:
self,
image_path,
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', 'outpaint', or 'embiggen'
gfpgan_strength = 0.0,
facetool_strength = 0.0,
codeformer_fidelity = 0.75,
upscale = None,
out_direction = None,
@ -507,11 +566,11 @@ class Generate:
facetool = 'codeformer'
elif tool == 'upscale':
facetool = 'gfpgan' # but won't be run
gfpgan_strength = 0
facetool_strength = 0
return self.upscale_and_reconstruct(
[[image,seed]],
facetool = facetool,
strength = gfpgan_strength,
strength = facetool_strength,
codeformer_fidelity = codeformer_fidelity,
save_original = save_original,
upscale = upscale,
@ -523,16 +582,19 @@ class Generate:
from ldm.invoke.restoration.outcrop import Outcrop
extend_instructions = {}
for direction,pixels in _pairwise(opt.outcrop):
extend_instructions[direction]=int(pixels)
restorer = Outcrop(image,self,)
return restorer.process (
extend_instructions,
opt = opt,
orig_opt = args,
image_callback = callback,
prefix = prefix,
)
try:
extend_instructions[direction]=int(pixels)
except ValueError:
print(f'** invalid extension instruction. Use <directions> <pixels>..., as in "top 64 left 128 right 64 bottom 64"')
if len(extend_instructions)>0:
restorer = Outcrop(image,self,)
return restorer.process (
extend_instructions,
opt = opt,
orig_opt = args,
image_callback = callback,
prefix = prefix,
)
elif tool == 'embiggen':
# fetch the metadata from the image
@ -581,17 +643,14 @@ class Generate:
width,
height,
fit=False,
text_mask=None,
):
init_image = None
init_mask = None
if not img:
return None, None
image = self._load_img(
img,
width,
height,
)
image = self._load_img(img)
if image.width < self.width and image.height < self.height:
print(f'>> WARNING: img2img and inpainting may produce unexpected results with initial images smaller than {self.width}x{self.height} in both dimensions')
@ -599,20 +658,22 @@ class Generate:
# if image has a transparent area and no mask was provided, then try to generate mask
if self._has_transparency(image):
self._transparency_check_and_warning(image, mask)
# this returns a torch tensor
init_mask = self._create_init_mask(image, width, height, fit=fit)
if (image.width * image.height) > (self.width * self.height):
if (image.width * image.height) > (self.width * self.height) and self.size_matters:
print(">> This input is larger than your defaults. If you run out of memory, please use a smaller image.")
self.size_matters = False
init_image = self._create_init_image(image,width,height,fit=fit) # this returns a torch tensor
init_image = self._create_init_image(image,width,height,fit=fit)
if mask:
mask_image = self._load_img(
mask, width, height) # this returns an Image
mask_image = self._load_img(mask)
init_mask = self._create_init_mask(mask_image,width,height,fit=fit)
return init_image, init_mask
elif text_mask:
init_mask = self._txt2mask(image, text_mask, width, height, fit=fit)
return init_image,init_mask
def _make_base(self):
if not self.generators.get('base'):
@ -653,29 +714,39 @@ class Generate:
return self.generators['inpaint']
def load_model(self):
"""Load and initialize the model from configuration variables passed at object creation time"""
if self.model is None:
seed_everything(random.randrange(0, np.iinfo(np.uint32).max))
try:
model = self._load_model_from_config(self.config, self.weights)
if self.embedding_path is not None:
model.embedding_manager.load(
self.embedding_path, self.precision == 'float32' or self.precision == 'autocast'
)
self.model = model.to(self.device)
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
self.model.cond_stage_model.device = self.device
except AttributeError as e:
print(f'>> Error loading model. {str(e)}', file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
raise SystemExit from e
'''
preload model identified in self.model_name
'''
self.set_model(self.model_name)
self._set_sampler()
def set_model(self,model_name):
"""
Given the name of a model defined in models.yaml, will load and initialize it
and return the model object. Previously-used models will be cached.
"""
if self.model_name == model_name and self.model is not None:
return self.model
for m in self.model.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
m._orig_padding_mode = m.padding_mode
model_data = self.model_cache.get_model(model_name)
if model_data is None or len(model_data) == 0:
return None
self.model = model_data['model']
self.width = model_data['width']
self.height= model_data['height']
self.model_hash = model_data['hash']
# uncache generators so they pick up new models
self.generators = {}
seed_everything(random.randrange(0, np.iinfo(np.uint32).max))
if self.embedding_path is not None:
self.model.embedding_manager.load(
self.embedding_path, self.precision == 'float32' or self.precision == 'autocast'
)
self._set_sampler()
self.model_name = model_name
return self.model
def correct_colors(self,
@ -747,6 +818,23 @@ class Generate:
else:
r[0] = image
def apply_textmask(self, image_path:str, prompt:str, callback, threshold:float=0.5):
assert os.path.exists(image_path), '** "{image_path}" not found. Please enter the name of an existing image file to mask **'
basename,_ = os.path.splitext(os.path.basename(image_path))
if self.txt2mask is None:
self.txt2mask = Txt2Mask(device = self.device)
segmented = self.txt2mask.segment(image_path,prompt)
trans = segmented.to_transparent()
inverse = segmented.to_transparent(invert=True)
mask = segmented.to_mask(threshold)
path_filter = re.compile(r'[<>:"/\\|?*]')
safe_prompt = path_filter.sub('_', prompt)[:50].rstrip(' .')
callback(trans,f'{safe_prompt}.deselected',use_prefix=basename)
callback(inverse,f'{safe_prompt}.selected',use_prefix=basename)
callback(mask,f'{safe_prompt}.masked',use_prefix=basename)
# to help WebGUI - front end to generator util function
def sample_to_image(self, samples):
return self._make_base().sample_to_image(samples)
@ -779,54 +867,7 @@ class Generate:
print(msg)
# Be warned: config is the path to the model config file, not the invoke conf file!
# Also note that we can get config and weights from self, so why do we need to
# pass them as args?
def _load_model_from_config(self, config, weights):
print(f'>> Loading model from {weights}')
# for usage statistics
device_type = choose_torch_device()
if device_type == 'cuda':
torch.cuda.reset_peak_memory_stats()
tic = time.time()
# this does the work
c = OmegaConf.load(config)
with open(weights,'rb') as f:
weight_bytes = f.read()
self.model_hash = self._cached_sha256(weights,weight_bytes)
pl_sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu')
del weight_bytes
sd = pl_sd['state_dict']
model = instantiate_from_config(c.model)
m, u = model.load_state_dict(sd, strict=False)
if self.precision == 'float16':
print('>> Using faster float16 precision')
model.to(torch.float16)
else:
print('>> Using more accurate float32 precision')
model.to(self.device)
model.eval()
# usage statistics
toc = time.time()
print(
f'>> Model loaded in', '%4.2fs' % (toc - tic)
)
if self._has_cuda():
print(
'>> Max VRAM used to load the model:',
'%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9),
'\n>> Current VRAM usage:'
'%4.2fG' % (torch.cuda.memory_allocated() / 1e9),
)
return model
def _load_img(self, img, width, height)->Image:
def _load_img(self, img)->Image:
if isinstance(img, Image.Image):
image = img
print(
@ -849,45 +890,43 @@ class Generate:
def _create_init_image(self, image, width, height, fit=True):
image = image.convert('RGB')
if fit:
image = self._fit_image(image, (width, height))
else:
image = self._squeeze_image(image)
image = np.array(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
image = 2.0 * image - 1.0
return image.to(self.device)
image = self._fit_image(image, (width, height)) if fit else self._squeeze_image(image)
return image
def _create_init_mask(self, image, width, height, fit=True):
# convert into a black/white mask
image = self._image_to_mask(image)
image = image.convert('RGB')
# now we adjust the size
if fit:
image = self._fit_image(image, (width, height))
else:
image = self._squeeze_image(image)
image = image.resize((image.width//downsampling, image.height //
downsampling), resample=Image.Resampling.NEAREST)
image = np.array(image)
image = image.astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return image.to(self.device)
image = self._fit_image(image, (width, height)) if fit else self._squeeze_image(image)
return image
# The mask is expected to have the region to be inpainted
# with alpha transparency. It converts it into a black/white
# image with the transparent part black.
def _image_to_mask(self, mask_image, invert=False) -> Image:
def _image_to_mask(self, mask_image: Image.Image, invert=False) -> Image:
# Obtain the mask from the transparency channel
mask = Image.new(mode="L", size=mask_image.size, color=255)
mask.putdata(mask_image.getdata(band=3))
if mask_image.mode == 'L':
mask = mask_image
else:
# Obtain the mask from the transparency channel
mask = Image.new(mode="L", size=mask_image.size, color=255)
mask.putdata(mask_image.getdata(band=3))
if invert:
mask = ImageOps.invert(mask)
return mask
def _txt2mask(self, image:Image, text_mask:list, width, height, fit=True) -> Image:
prompt = text_mask[0]
confidence_level = text_mask[1] if len(text_mask)>1 else 0.5
if self.txt2mask is None:
self.txt2mask = Txt2Mask(device = self.device)
segmented = self.txt2mask.segment(image, prompt)
mask = segmented.to_mask(float(confidence_level))
mask = mask.convert('RGB')
mask = self._fit_image(mask, (width, height)) if fit else self._squeeze_image(mask)
return mask
def _has_transparency(self, image):
if image.info.get("transparency", None) is not None:
return True
@ -969,26 +1008,6 @@ class Generate:
def _has_cuda(self):
return self.device.type == 'cuda'
def _cached_sha256(self,path,data):
dirname = os.path.dirname(path)
basename = os.path.basename(path)
base, _ = os.path.splitext(basename)
hashpath = os.path.join(dirname,base+'.sha256')
if os.path.exists(hashpath) and os.path.getmtime(path) <= os.path.getmtime(hashpath):
with open(hashpath) as f:
hash = f.read()
return hash
print(f'>> Calculating sha256 hash of weights file')
tic = time.time()
sha = hashlib.sha256()
sha.update(data)
hash = sha.hexdigest()
toc = time.time()
print(f'>> sha256 = {hash}','(%4.2fs)' % (toc - tic))
with open(hashpath,'w') as f:
f.write(hash)
return hash
def write_intermediate_images(self,modulus,path):
counter = -1
if not os.path.exists(path):

View File

@ -113,8 +113,8 @@ PRECISION_CHOICES = [
]
# is there a way to pick this up during git commits?
APP_ID = 'lstein/stable-diffusion'
APP_VERSION = 'v1.15'
APP_ID = 'invoke-ai/InvokeAI'
APP_VERSION = 'v2.02'
class ArgFormatter(argparse.RawTextHelpFormatter):
# use defined argument order to display usage
@ -172,6 +172,7 @@ class Args(object):
command = cmd_string.replace("'", "\\'")
try:
elements = shlex.split(command)
elements = [x.replace("\\'","'") for x in elements]
except ValueError:
import sys, traceback
print(traceback.format_exc(), file=sys.stderr)
@ -239,12 +240,19 @@ class Args(object):
switches.append(f'--init_color {a["init_color"]}')
if a['strength'] and a['strength']>0:
switches.append(f'-f {a["strength"]}')
if a['inpaint_replace']:
switches.append(f'--inpaint_replace')
if a['text_mask']:
switches.append(f'-tm {" ".join([str(u) for u in a["text_mask"]])}')
else:
switches.append(f'-A {a["sampler_name"]}')
# gfpgan-specific parameters
if a['gfpgan_strength']:
switches.append(f'-G {a["gfpgan_strength"]}')
# facetool-specific parameters, only print if running facetool
if a['facetool_strength']:
switches.append(f'-G {a["facetool_strength"]}')
switches.append(f'-ft {a["facetool"]}')
if a["facetool"] == "codeformer":
switches.append(f'-cf {a["codeformer_fidelity"]}')
if a['outcrop']:
switches.append(f'-c {" ".join([str(u) for u in a["outcrop"]])}')
@ -262,11 +270,12 @@ class Args(object):
# outpainting parameters
if a['out_direction']:
switches.append(f'-D {" ".join([str(u) for u in a["out_direction"]])}')
# LS: slight semantic drift which needs addressing in the future:
# 1. Variations come out of the stored metadata as a packed string with the keyword "variations"
# 2. However, they come out of the CLI (and probably web) with the keyword "with_variations" and
# in broken-out form. Variation (1) should be changed to comply with (2)
if a['with_variations']:
if a['with_variations'] and len(a['with_variations'])>0:
formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"]))
switches.append(f'-V {formatted_variations}')
if 'variations' in a and len(a['variations'])>0:
@ -360,17 +369,24 @@ class Args(object):
deprecated_group.add_argument('--laion400m')
deprecated_group.add_argument('--weights') # deprecated
model_group.add_argument(
'--conf',
'--config',
'-c',
'-conf',
'-config',
dest='conf',
default='./configs/models.yaml',
help='Path to configuration file for alternate models.',
)
model_group.add_argument(
'--model',
default='stable-diffusion-1.4',
help='Indicates which diffusion model to load. (currently "stable-diffusion-1.4" (default) or "laion400m")',
help='Indicates which diffusion model to load (defaults to "default" stanza in configs/models.yaml)',
)
model_group.add_argument(
'--png_compression','-z',
type=int,
default=6,
choices=range(0,9),
dest='png_compression',
help='level of PNG compression, from 0 (none) to 9 (maximum). Default is 6.'
)
model_group.add_argument(
'--sampler',
@ -405,6 +421,11 @@ class Args(object):
help=f'Set model precision. Defaults to auto selected based on device. Options: {", ".join(PRECISION_CHOICES)}',
default='auto',
)
model_group.add_argument(
'--safety_checker',
action='store_true',
help='Check for and blur potentially NSFW images',
)
file_group.add_argument(
'--from_file',
dest='infile',
@ -424,6 +445,12 @@ class Args(object):
action='store_true',
help='Place images in subdirectories named after the prompt.',
)
render_group.add_argument(
'--fnformat',
default='{prefix}.{seed}.png',
type=str,
help='Overwrite the filename format. You can use any argument as wildcard enclosed in curly braces. Default is {prefix}.{seed}.png',
)
render_group.add_argument(
'--grid',
'-g',
@ -515,7 +542,7 @@ class Args(object):
formatter_class=ArgFormatter,
description=
"""
*Image generation:*
*Image generation*
invoke> a fantastic alien landscape -W576 -H512 -s60 -n4
*postprocessing*
@ -530,6 +557,13 @@ class Args(object):
!history lists all the commands issued during the current session.
!NN retrieves the NNth command from the history
*Model manipulation*
!models -- list models in configs/models.yaml
!switch <model_name> -- switch to model named <model_name>
!import_model path/to/weights/file.ckpt -- adds a model to your config
!edit_model <model_name> -- edit a model's description
!del_model <model_name> -- delete a model
"""
)
render_group = parser.add_argument_group('General rendering')
@ -590,6 +624,12 @@ class Args(object):
type=float,
help='Perlin noise scale (0.0 - 1.0) - add perlin noise to the initialization instead of the usual gaussian noise.',
)
render_group.add_argument(
'--fnformat',
default='{prefix}.{seed}.png',
type=str,
help='Overwrite the filename format. You can use any argument as wildcard enclosed in curly braces. Default is {prefix}.{seed}.png',
)
render_group.add_argument(
'--grid',
'-g',
@ -636,6 +676,21 @@ class Args(object):
dest='hires_fix',
help='Create hires image using img2img to prevent duplicated objects'
)
render_group.add_argument(
'--save_intermediates',
type=int,
default=0,
dest='save_intermediates',
help='Save every nth intermediate image into an "intermediates" directory within the output directory'
)
render_group.add_argument(
'--png_compression','-z',
type=int,
default=6,
choices=range(0,10),
dest='png_compression',
help='level of PNG compression, from 0 (none) to 9 (maximum). Default is 6.'
)
img2img_group.add_argument(
'-I',
'--init_img',
@ -648,6 +703,14 @@ class Args(object):
type=str,
help='Path to input mask for inpainting mode (supersedes width and height)',
)
img2img_group.add_argument(
'-tm',
'--text_mask',
nargs='+',
type=str,
help='Use the clipseg classifier to generate the mask area for inpainting. Provide a description of the area to mask ("a mug"), optionally followed by the confidence level threshold (0-1.0; defaults to 0.5).',
default=None,
)
img2img_group.add_argument(
'--init_color',
type=str,
@ -683,6 +746,13 @@ class Args(object):
metavar=('direction','pixels'),
help='Outcrop the image with one or more direction/pixel pairs: -c top 64 bottom 128 left 64 right 64',
)
img2img_group.add_argument(
'-r',
'--inpaint_replace',
type=float,
default=0.0,
help='when inpainting, adjust how aggressively to replace the part of the picture under the mask, from 0.0 (a gentle merge) to 1.0 (replace entirely)',
)
postprocessing_group.add_argument(
'-ft',
'--facetool',
@ -692,6 +762,7 @@ class Args(object):
)
postprocessing_group.add_argument(
'-G',
'--facetool_strength',
'--gfpgan_strength',
type=float,
help='The strength at which to apply the face restoration to the result.',
@ -739,6 +810,12 @@ class Args(object):
action='store_true',
help='Change the model to seamless tiling (circular) mode',
)
special_effects_group.add_argument(
'--seamless_axes',
default=['x', 'y'],
type=list[str],
help='Specify which axes to use circular convolution on.',
)
variation_group.add_argument(
'-v',
'--variation_amount',
@ -753,6 +830,13 @@ class Args(object):
type=str,
help='list of variations to apply, in the format `seed:weight,seed:weight,...'
)
render_group.add_argument(
'--use_mps_noise',
action='store_true',
dest='use_mps_noise',
help='Simulate noise on M1 systems to get the same results'
)
return parser
def format_metadata(**kwargs):
@ -788,8 +872,8 @@ def metadata_dumps(opt,
# remove any image keys not mentioned in RFC #266
rfc266_img_fields = ['type','postprocessing','sampler','prompt','seed','variations','steps',
'cfg_scale','threshold','perlin','step_number','width','height','extra','strength']
'cfg_scale','threshold','perlin','fnformat', 'step_number','width','height','extra','strength',
'init_img','init_mask','facetool','facetool_strength','upscale']
rfc_dict ={}
for item in image_dict.items():
@ -809,11 +893,15 @@ def metadata_dumps(opt,
# 'variations' should always exist and be an array, empty or consisting of {'seed': seed, 'weight': weight} pairs
rfc_dict['variations'] = [{'seed':x[0],'weight':x[1]} for x in opt.with_variations] if opt.with_variations else []
# if variations are present then we need to replace 'seed' with 'orig_seed'
if hasattr(opt,'first_seed'):
rfc_dict['seed'] = opt.first_seed
if opt.init_img:
rfc_dict['type'] = 'img2img'
rfc_dict['strength_steps'] = rfc_dict.pop('strength')
rfc_dict['orig_hash'] = calculate_init_img_hash(opt.init_img)
rfc_dict['sampler'] = 'ddim' # TODO: FIX ME WHEN IMG2IMG SUPPORTS ALL SAMPLERS
rfc_dict['type'] = 'img2img'
rfc_dict['strength_steps'] = rfc_dict.pop('strength')
rfc_dict['orig_hash'] = calculate_init_img_hash(opt.init_img)
rfc_dict['inpaint_replace'] = opt.inpaint_replace
else:
rfc_dict['type'] = 'txt2img'
rfc_dict.pop('strength')
@ -868,7 +956,7 @@ def metadata_loads(metadata) -> list:
for image in images:
# repack the prompt and variations
if 'prompt' in image:
image['prompt'] = ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in image['prompt']])
image['prompt'] = repack_prompt(image['prompt'])
if 'variations' in image:
image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']])
# fix a bit of semantic drift here
@ -876,12 +964,19 @@ def metadata_loads(metadata) -> list:
opt = Args()
opt._cmd_switches = Namespace(**image)
results.append(opt)
except KeyError as e:
except Exception as e:
import sys, traceback
print('>> badly-formatted metadata',file=sys.stderr)
print('>> could not read metadata',file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
return results
def repack_prompt(prompt_list:list)->str:
# in the common case of no weighting syntax, just return the prompt as is
if len(prompt_list) > 1:
return ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in prompt_list])
else:
return prompt_list[0]['prompt']
# image can either be a file path on disk or a base64-encoded
# representation of the file's contents
def calculate_init_img_hash(image_string):
@ -911,17 +1006,17 @@ def sha256(path):
return sha.hexdigest()
def legacy_metadata_load(meta,pathname) -> Args:
opt = Args()
if 'Dream' in meta and len(meta['Dream']) > 0:
dream_prompt = meta['Dream']
opt = Args()
opt.parse_cmd(dream_prompt)
return opt
else: # if nothing else, we can get the seed
match = re.search('\d+\.(\d+)',pathname)
if match:
seed = match.groups()[0]
opt = Args()
opt.seed = seed
return opt
return None
else:
opt.prompt = ''
opt.seed = 0
return opt

View File

@ -5,26 +5,30 @@ including img2img, txt2img, and inpaint
import torch
import numpy as np
import random
import os
from tqdm import tqdm, trange
from PIL import Image
from PIL import Image, ImageFilter
from einops import rearrange, repeat
from pytorch_lightning import seed_everything
from ldm.invoke.devices import choose_autocast
from ldm.util import rand_perlin_2d
downsampling = 8
CAUTION_IMG = 'assets/caution.png'
class Generator():
def __init__(self, model, precision):
self.model = model
self.precision = precision
self.seed = None
self.latent_channels = model.channels
self.model = model
self.precision = precision
self.seed = None
self.latent_channels = model.channels
self.downsampling_factor = downsampling # BUG: should come from model or config
self.perlin = 0.0
self.threshold = 0
self.variation_amount = 0
self.with_variations = []
self.safety_checker = None
self.perlin = 0.0
self.threshold = 0
self.variation_amount = 0
self.with_variations = []
self.use_mps_noise = False
# this is going to be overridden in img2img.py, txt2img.py and inpaint.py
def get_make_image(self,prompt,**kwargs):
@ -41,8 +45,10 @@ class Generator():
def generate(self,prompt,init_image,width,height,iterations=1,seed=None,
image_callback=None, step_callback=None, threshold=0.0, perlin=0.0,
safety_checker:dict=None,
**kwargs):
scope = choose_autocast(self.precision)
self.safety_checker = safety_checker
make_image = self.get_make_image(
prompt,
init_image = init_image,
@ -76,10 +82,17 @@ class Generator():
pass
image = make_image(x_T)
if self.safety_checker is not None:
image = self.safety_check(image)
results.append([image, seed])
if image_callback is not None:
image_callback(image, seed, first_seed=first_seed)
seed = self.new_seed()
return results
def sample_to_image(self,samples):
@ -168,3 +181,47 @@ class Generator():
return v2
def safety_check(self,image:Image.Image):
'''
If the CompViz safety checker flags an NSFW image, we
blur it out.
'''
import diffusers
checker = self.safety_checker['checker']
extractor = self.safety_checker['extractor']
features = extractor([image], return_tensors="pt")
# unfortunately checker requires the numpy version, so we have to convert back
x_image = np.array(image).astype(np.float32) / 255.0
x_image = x_image[None].transpose(0, 3, 1, 2)
diffusers.logging.set_verbosity_error()
checked_image, has_nsfw_concept = checker(images=x_image, clip_input=features.pixel_values)
if has_nsfw_concept[0]:
print('** An image with potential non-safe content has been detected. A blurred image will be returned. **')
return self.blur(image)
else:
return image
def blur(self,input):
blurry = input.filter(filter=ImageFilter.GaussianBlur(radius=32))
try:
caution = Image.open(CAUTION_IMG)
caution = caution.resize((caution.width // 2, caution.height //2))
blurry.paste(caution,(0,0),caution)
except FileNotFoundError:
pass
return blurry
# this is a handy routine for debugging use. Given a generated sample,
# convert it into a PNG image and store it at the indicated path
def save_sample(self, sample, filepath):
image = self.sample_to_image(sample)
dirname = os.path.dirname(filepath) or '.'
if not os.path.exists(dirname):
print(f'** creating directory {dirname}')
os.makedirs(dirname, exist_ok=True)
image.save(filepath,'PNG')

View File

@ -4,9 +4,12 @@ ldm.invoke.generator.img2img descends from ldm.invoke.generator
import torch
import numpy as np
from ldm.invoke.devices import choose_autocast
from ldm.invoke.generator.base import Generator
from ldm.models.diffusion.ddim import DDIMSampler
import PIL
from torch import Tensor
from PIL import Image
from ldm.invoke.devices import choose_autocast
from ldm.invoke.generator.base import Generator
from ldm.models.diffusion.ddim import DDIMSampler
class Img2Img(Generator):
def __init__(self, model, precision):
@ -25,6 +28,9 @@ class Img2Img(Generator):
ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
)
if isinstance(init_image, PIL.Image.Image):
init_image = self._image_to_tensor(init_image)
scope = choose_autocast(self.precision)
with scope(self.model.device.type):
self.init_latent = self.model.get_first_stage_encoding(
@ -68,3 +74,11 @@ class Img2Img(Generator):
shape = init_latent.shape
x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(shape[3], shape[2])
return x
def _image_to_tensor(self, image:Image, normalize:bool=True)->Tensor:
image = np.array(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
if normalize:
image = 2.0 * image - 1.0
return image.to(self.model.device)

View File

@ -3,27 +3,55 @@ ldm.invoke.generator.inpaint descends from ldm.invoke.generator
'''
import torch
import torchvision.transforms as T
import numpy as np
import cv2 as cv
import PIL
from PIL import Image, ImageFilter
from skimage.exposure.histogram_matching import match_histograms
from einops import rearrange, repeat
from ldm.invoke.devices import choose_autocast
from ldm.invoke.generator.img2img import Img2Img
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.ksampler import KSampler
from ldm.invoke.generator.base import downsampling
class Inpaint(Img2Img):
def __init__(self, model, precision):
self.init_latent = None
self.pil_image = None
self.pil_mask = None
self.mask_blur_radius = 0
super().__init__(model, precision)
@torch.no_grad()
def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
conditioning,init_image,mask_image,strength,
step_callback=None,**kwargs):
mask_blur_radius: int = 8,
step_callback=None,inpaint_replace=False, **kwargs):
"""
Returns a function returning an image derived from the prompt and
the initial image + mask. Return value depends on the seed at
the time you call it. kwargs are 'init_latent' and 'strength'
"""
if isinstance(init_image, PIL.Image.Image):
self.pil_image = init_image
init_image = self._image_to_tensor(init_image)
if isinstance(mask_image, PIL.Image.Image):
self.pil_mask = mask_image
mask_image = mask_image.resize(
(
mask_image.width // downsampling,
mask_image.height // downsampling
),
resample=Image.Resampling.NEAREST
)
mask_image = self._image_to_tensor(mask_image,normalize=False)
self.mask_blur_radius = mask_blur_radius
# klms samplers not supported yet, so ignore previous sampler
if isinstance(sampler,KSampler):
print(
@ -58,6 +86,14 @@ class Inpaint(Img2Img):
noise=x_T
)
# to replace masked area with latent noise, weighted by inpaint_replace strength
if inpaint_replace > 0.0:
print(f'>> inpaint will replace what was under the mask with a strength of {inpaint_replace}')
l_noise = self.get_noise(kwargs['width'],kwargs['height'])
inverted_mask = 1.0-mask_image # there will be 1s where the mask is
masked_region = (1.0-inpaint_replace) * inverted_mask * z_enc + inpaint_replace * inverted_mask * l_noise
z_enc = z_enc * mask_image + masked_region
# decode it
samples = sampler.decode(
z_enc,
@ -69,10 +105,50 @@ class Inpaint(Img2Img):
mask = mask_image,
init_latent = self.init_latent
)
return self.sample_to_image(samples)
return make_image
def sample_to_image(self, samples)->Image.Image:
gen_result = super().sample_to_image(samples).convert('RGB')
if self.pil_image is None or self.pil_mask is None:
return gen_result
pil_mask = self.pil_mask
pil_image = self.pil_image
mask_blur_radius = self.mask_blur_radius
# Get the original alpha channel of the mask if there is one.
# Otherwise it is some other black/white image format ('1', 'L' or 'RGB')
pil_init_mask = pil_mask.getchannel('A') if pil_mask.mode == 'RGBA' else pil_mask.convert('L')
pil_init_image = pil_image.convert('RGBA') # Add an alpha channel if one doesn't exist
# Build an image with only visible pixels from source to use as reference for color-matching.
# Note that this doesn't use the mask, which would exclude some source image pixels from the
# histogram and cause slight color changes.
init_rgb_pixels = np.asarray(pil_image.convert('RGB'), dtype=np.uint8).reshape(pil_image.width * pil_image.height, 3)
init_a_pixels = np.asarray(pil_init_image.getchannel('A'), dtype=np.uint8).reshape(pil_init_mask.width * pil_init_mask.height)
init_rgb_pixels = init_rgb_pixels[init_a_pixels > 0]
init_rgb_pixels = init_rgb_pixels.reshape(1, init_rgb_pixels.shape[0], init_rgb_pixels.shape[1]) # Filter to just pixels that have any alpha, this is now our histogram
# Get numpy version
np_gen_result = np.asarray(gen_result, dtype=np.uint8)
# Color correct
np_matched_result = match_histograms(np_gen_result, init_rgb_pixels, channel_axis=-1)
matched_result = Image.fromarray(np_matched_result, mode='RGB')
# Blur the mask out (into init image) by specified amount
if mask_blur_radius > 0:
nm = np.asarray(pil_init_mask, dtype=np.uint8)
nmd = cv.erode(nm, kernel=np.ones((3,3), dtype=np.uint8), iterations=int(mask_blur_radius / 2))
pmd = Image.fromarray(nmd, mode='L')
blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(mask_blur_radius))
else:
blurred_init_mask = pil_init_mask
# Paste original on color-corrected generation (using blurred mask)
matched_result.paste(pil_image, (0,0), mask = blurred_init_mask)
return matched_result

View File

@ -59,7 +59,7 @@ class Txt2Img(Generator):
# returns a tensor filled with random numbers from a normal distribution
def get_noise(self,width,height):
device = self.model.device
if device.type == 'mps':
if self.use_mps_noise or device.type == 'mps':
x = torch.randn([1,
self.latent_channels,
height // self.downsampling_factor,
@ -74,3 +74,4 @@ class Txt2Img(Generator):
if self.perlin > 0.0:
x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(width // self.downsampling_factor, height // self.downsampling_factor)
return x

View File

@ -118,7 +118,7 @@ class Txt2Img2Img(Generator):
scaled_height = height
device = self.model.device
if device.type == 'mps':
if self.use_mps_noise or device.type == 'mps':
return torch.randn([1,
self.latent_channels,
scaled_height // self.downsampling_factor,

355
ldm/invoke/model_cache.py Normal file
View File

@ -0,0 +1,355 @@
'''
Manage a cache of Stable Diffusion model files for fast switching.
They are moved between GPU and CPU as necessary. If CPU memory falls
below a preset minimum, the least recently used model will be
cleared and loaded from disk when next needed.
'''
import torch
import os
import io
import time
import gc
import hashlib
import psutil
import transformers
import os
from sys import getrefcount
from omegaconf import OmegaConf
from omegaconf.errors import ConfigAttributeError
from ldm.util import instantiate_from_config
GIGS=2**30
AVG_MODEL_SIZE=2.1*GIGS
DEFAULT_MIN_AVAIL=2*GIGS
class ModelCache(object):
def __init__(self, config:OmegaConf, device_type:str, precision:str, min_avail_mem=DEFAULT_MIN_AVAIL):
'''
Initialize with the path to the models.yaml config file,
the torch device type, and precision. The optional
min_avail_mem argument specifies how much unused system
(CPU) memory to preserve. The cache of models in RAM will
grow until this value is approached. Default is 2G.
'''
# prevent nasty-looking CLIP log message
transformers.logging.set_verbosity_error()
self.config = config
self.precision = precision
self.device = torch.device(device_type)
self.min_avail_mem = min_avail_mem
self.models = {}
self.stack = [] # this is an LRU FIFO
self.current_model = None
def get_model(self, model_name:str):
'''
Given a model named identified in models.yaml, return
the model object. If in RAM will load into GPU VRAM.
If on disk, will load from there.
'''
if model_name not in self.config:
print(f'** "{model_name}" is not a known model name. Please check your models.yaml file')
return None
if self.current_model != model_name:
self.unload_model(self.current_model)
if model_name in self.models:
requested_model = self.models[model_name]['model']
print(f'>> Retrieving model {model_name} from system RAM cache')
self.models[model_name]['model'] = self._model_from_cpu(requested_model)
width = self.models[model_name]['width']
height = self.models[model_name]['height']
hash = self.models[model_name]['hash']
else:
self._check_memory()
try:
requested_model, width, height, hash = self._load_model(model_name)
self.models[model_name] = {}
self.models[model_name]['model'] = requested_model
self.models[model_name]['width'] = width
self.models[model_name]['height'] = height
self.models[model_name]['hash'] = hash
except Exception as e:
print(f'** model {model_name} could not be loaded: {str(e)}')
print(f'** restoring {self.current_model}')
self.get_model(self.current_model)
return None
self.current_model = model_name
self._push_newest_model(model_name)
return {
'model':requested_model,
'width':width,
'height':height,
'hash': hash
}
def default_model(self) -> str:
'''
Returns the name of the default model, or None
if none is defined.
'''
for model_name in self.config:
if self.config[model_name].get('default',False):
return model_name
return None
def set_default_model(self,model_name:str):
'''
Set the default model. The change will not take
effect until you call model_cache.commit()
'''
assert model_name in self.models,f"unknown model '{model_name}'"
for model in self.models:
self.models[model].pop('default',None)
self.models[model_name]['default'] = True
def list_models(self) -> dict:
'''
Return a dict of models in the format:
{ model_name1: {'status': ('active'|'cached'|'not loaded'),
'description': description,
},
model_name2: { etc }
'''
result = {}
for name in self.config:
try:
description = self.config[name].description
except ConfigAttributeError:
description = '<no description>'
if self.current_model == name:
status = 'active'
elif name in self.models:
status = 'cached'
else:
status = 'not loaded'
result[name]={}
result[name]['status']=status
result[name]['description']=description
return result
def print_models(self):
'''
Print a table of models, their descriptions, and load status
'''
models = self.list_models()
for name in models:
line = f'{name:25s} {models[name]["status"]:>10s} {models[name]["description"]}'
if models[name]['status'] == 'active':
print(f'\033[1m{line}\033[0m')
else:
print(line)
def del_model(self, model_name:str) ->bool:
'''
Delete the named model.
'''
omega = self.config
del omega[model_name]
if model_name in self.stack:
self.stack.remove(model_name)
return True
def add_model(self, model_name:str, model_attributes:dict, clobber=False) ->True:
'''
Update the named model with a dictionary of attributes. Will fail with an
assertion error if the name already exists. Pass clobber=True to overwrite.
On a successful update, the config will be changed in memory and the
method will return True. Will fail with an assertion error if provided
attributes are incorrect or the model name is missing.
'''
omega = self.config
# check that all the required fields are present
for field in ('description','weights','height','width','config'):
assert field in model_attributes, f'required field {field} is missing'
assert (clobber or model_name not in omega), f'attempt to overwrite existing model definition "{model_name}"'
config = omega[model_name] if model_name in omega else {}
for field in model_attributes:
config[field] = model_attributes[field]
omega[model_name] = config
if clobber:
self._invalidate_cached_model(model_name)
return True
def _check_memory(self):
avail_memory = psutil.virtual_memory()[1]
if AVG_MODEL_SIZE + self.min_avail_mem > avail_memory:
least_recent_model = self._pop_oldest_model()
if least_recent_model is not None:
del self.models[least_recent_model]
gc.collect()
def _load_model(self, model_name:str):
"""Load and initialize the model from configuration variables passed at object creation time"""
if model_name not in self.config:
print(f'"{model_name}" is not a known model name. Please check your models.yaml file')
return None
mconfig = self.config[model_name]
config = mconfig.config
weights = mconfig.weights
vae = mconfig.get('vae',None)
width = mconfig.width
height = mconfig.height
print(f'>> Loading {model_name} from {weights}')
# for usage statistics
if self._has_cuda():
torch.cuda.reset_peak_memory_stats()
torch.cuda.empty_cache()
tic = time.time()
# this does the work
c = OmegaConf.load(config)
with open(weights,'rb') as f:
weight_bytes = f.read()
model_hash = self._cached_sha256(weights,weight_bytes)
pl_sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu')
del weight_bytes
sd = pl_sd['state_dict']
model = instantiate_from_config(c.model)
m, u = model.load_state_dict(sd, strict=False)
if self.precision == 'float16':
print(' | Using faster float16 precision')
model.to(torch.float16)
else:
print(' | Using more accurate float32 precision')
# look and load a matching vae file. Code borrowed from AUTOMATIC1111 modules/sd_models.py
if vae and os.path.exists(vae):
print(f' | Loading VAE weights from: {vae}')
vae_ckpt = torch.load(vae, map_location="cpu")
vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
model.first_stage_model.load_state_dict(vae_dict, strict=False)
model.to(self.device)
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
model.cond_stage_model.device = self.device
model.eval()
for m in model.modules():
if isinstance(m, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)):
m._orig_padding_mode = m.padding_mode
# usage statistics
toc = time.time()
print(f'>> Model loaded in', '%4.2fs' % (toc - tic))
if self._has_cuda():
print(
'>> Max VRAM used to load the model:',
'%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9),
'\n>> Current VRAM usage:'
'%4.2fG' % (torch.cuda.memory_allocated() / 1e9),
)
return model, width, height, model_hash
def unload_model(self, model_name:str):
if model_name not in self.models:
return
print(f'>> Caching model {model_name} in system RAM')
model = self.models[model_name]['model']
self.models[model_name]['model'] = self._model_to_cpu(model)
gc.collect()
if self._has_cuda():
torch.cuda.empty_cache()
def commit(self,config_file_path:str):
'''
Write current configuration out to the indicated file.
'''
yaml_str = OmegaConf.to_yaml(self.config)
tmpfile = os.path.join(os.path.dirname(config_file_path),'new_config.tmp')
with open(tmpfile, 'w') as outfile:
outfile.write(self.preamble())
outfile.write(yaml_str)
os.rename(tmpfile,config_file_path)
def preamble(self):
'''
Returns the preamble for the config file.
'''
return '''# This file describes the alternative machine learning models
# available to the dream script.
#
# To add a new model, follow the examples below. Each
# model requires a model config file, a weights file,
# and the width and height of the images it
# was trained on.
'''
def _invalidate_cached_model(self,model_name:str):
self.unload_model(model_name)
if model_name in self.stack:
self.stack.remove(model_name)
self.models.pop(model_name,None)
def _model_to_cpu(self,model):
if self.device != 'cpu':
model.cond_stage_model.device = 'cpu'
model.first_stage_model.to('cpu')
model.cond_stage_model.to('cpu')
model.model.to('cpu')
return model.to('cpu')
else:
return model
def _model_from_cpu(self,model):
if self.device != 'cpu':
model.to(self.device)
model.first_stage_model.to(self.device)
model.cond_stage_model.to(self.device)
model.cond_stage_model.device = self.device
return model
def _pop_oldest_model(self):
'''
Remove the first element of the FIFO, which ought
to be the least recently accessed model. Do not
pop the last one, because it is in active use!
'''
if len(self.stack) > 1:
return self.stack.pop(0)
def _push_newest_model(self,model_name:str):
'''
Maintain a simple FIFO. First element is always the
least recent, and last element is always the most recent.
'''
try:
self.stack.remove(model_name)
except ValueError:
pass
self.stack.append(model_name)
def _has_cuda(self):
return self.device.type == 'cuda'
def _cached_sha256(self,path,data):
dirname = os.path.dirname(path)
basename = os.path.basename(path)
base, _ = os.path.splitext(basename)
hashpath = os.path.join(dirname,base+'.sha256')
if os.path.exists(hashpath) and os.path.getmtime(path) <= os.path.getmtime(hashpath):
with open(hashpath) as f:
hash = f.read()
return hash
print(f'>> Calculating sha256 hash of weights file')
tic = time.time()
sha = hashlib.sha256()
sha.update(data)
hash = sha.hexdigest()
toc = time.time()
print(f'>> sha256 = {hash}','(%4.2fs)' % (toc - tic))
with open(hashpath,'w') as f:
f.write(hash)
return hash

View File

@ -33,13 +33,13 @@ class PngWriter:
# saves image named _image_ to outdir/name, writing metadata from prompt
# returns full path of output
def save_image_and_prompt_to_png(self, image, dream_prompt, name, metadata=None):
def save_image_and_prompt_to_png(self, image, dream_prompt, name, metadata=None, compress_level=6):
path = os.path.join(self.outdir, name)
info = PngImagePlugin.PngInfo()
info.add_text('Dream', dream_prompt)
if metadata:
info.add_text('sd-metadata', json.dumps(metadata))
image.save(path, 'PNG', pnginfo=info)
info.add_text('sd-metadata', json.dumps(metadata))
image.save(path, 'PNG', pnginfo=info, compress_level=compress_level)
return path
def retrieve_metadata(self,img_basename):

View File

@ -21,6 +21,9 @@ except (ImportError,ModuleNotFoundError):
readline_available = False
IMG_EXTENSIONS = ('.png','.jpg','.jpeg','.PNG','.JPG','.JPEG','.gif','.GIF')
WEIGHT_EXTENSIONS = ('.ckpt','.bae')
TEXT_EXTENSIONS = ('.txt','.TXT')
CONFIG_EXTENSIONS = ('.yaml','.yml')
COMMANDS = (
'--steps','-s',
'--seed','-S',
@ -31,6 +34,7 @@ COMMANDS = (
'--perlin',
'--grid','-g',
'--individual','-i',
'--save_intermediates',
'--init_img','-I',
'--init_mask','-M',
'--init_color',
@ -41,35 +45,58 @@ COMMANDS = (
'--embedding_path',
'--device',
'--grid','-g',
'--gfpgan_strength','-G',
'--facetool','-ft',
'--facetool_strength','-G',
'--codeformer_fidelity','-cf',
'--upscale','-U',
'-save_orig','--save_original',
'--skip_normalize','-x',
'--log_tokenization','-t',
'--hires_fix',
'!fix','!fetch','!history','!search','!clear',
'--inpaint_replace','-r',
'--png_compression','-z',
'--text_mask','-tm',
'!fix','!fetch','!replay','!history','!search','!clear',
'!models','!switch','!import_model','!edit_model','!del_model',
'!mask',
)
MODEL_COMMANDS = (
'!switch',
'!edit_model',
'!del_model',
)
WEIGHT_COMMANDS = (
'!import_model',
)
IMG_PATH_COMMANDS = (
'--outdir[=\s]',
)
TEXT_PATH_COMMANDS=(
'!replay',
)
IMG_FILE_COMMANDS=(
'!fix',
'!fetch',
'!mask',
'--init_img[=\s]','-I',
'--init_mask[=\s]','-M',
'--init_color[=\s]',
'--embedding_path[=\s]',
)
path_regexp = '('+'|'.join(IMG_PATH_COMMANDS+IMG_FILE_COMMANDS) + ')\s*\S*$'
path_regexp = '(' + '|'.join(IMG_PATH_COMMANDS+IMG_FILE_COMMANDS) + ')\s*\S*$'
weight_regexp = '(' + '|'.join(WEIGHT_COMMANDS) + ')\s*\S*$'
text_regexp = '(' + '|'.join(TEXT_PATH_COMMANDS) + ')\s*\S*$'
class Completer(object):
def __init__(self, options):
def __init__(self, options, models=[]):
self.options = sorted(options)
self.models = sorted(models)
self.seeds = set()
self.matches = list()
self.default_dir = None
self.linebuffer = None
self.auto_history_active = True
self.extensions = None
return
def complete(self, text, state):
@ -80,7 +107,13 @@ class Completer(object):
buffer = readline.get_line_buffer()
if state == 0:
if re.search(path_regexp,buffer):
# extensions defined, so go directly into path completion mode
if self.extensions is not None:
self.matches = self._path_completions(text, state, self.extensions)
# looking for an image file
elif re.search(path_regexp,buffer):
do_shortcut = re.search('^'+'|'.join(IMG_FILE_COMMANDS),buffer)
self.matches = self._path_completions(text, state, IMG_EXTENSIONS,shortcut_ok=do_shortcut)
@ -88,6 +121,16 @@ class Completer(object):
elif re.search('(-S\s*|--seed[=\s])\d*$',buffer):
self.matches= self._seed_completions(text,state)
# looking for a model
elif re.match('^'+'|'.join(MODEL_COMMANDS),buffer):
self.matches= self._model_completions(text, state)
elif re.search(weight_regexp,buffer):
self.matches = self._path_completions(text, state, WEIGHT_EXTENSIONS)
elif re.search(text_regexp,buffer):
self.matches = self._path_completions(text, state, TEXT_EXTENSIONS)
# This is the first time for this text, so build a match list.
elif text:
self.matches = [
@ -104,6 +147,13 @@ class Completer(object):
response = None
return response
def complete_extensions(self, extensions:list):
'''
If called with a list of extensions, will force completer
to do file path completions.
'''
self.extensions=extensions
def add_history(self,line):
'''
Pass thru to readline
@ -169,9 +219,24 @@ class Completer(object):
pydoc.pager('\n'.join(lines))
def set_line(self,line)->None:
'''
Set the default string displayed in the next line of input.
'''
self.linebuffer = line
readline.redisplay()
def add_model(self,model_name:str)->None:
'''
add a model name to the completion list
'''
self.models.append(model_name)
def del_model(self,model_name:str)->None:
'''
removes a model name from the completion list
'''
self.models.remove(model_name)
def _seed_completions(self, text, state):
m = re.search('(-S\s?|--seed[=\s]?)(\d*)',text)
if m:
@ -188,6 +253,21 @@ class Completer(object):
matches.sort()
return matches
def _model_completions(self, text, state):
m = re.search('(!switch\s+)(\w*)',text)
if m:
switch = m.groups()[0]
partial = m.groups()[1]
else:
switch = ''
partial = text
matches = list()
for s in self.models:
if s.startswith(partial):
matches.append(switch+s)
matches.sort()
return matches
def _pre_input_hook(self):
if self.linebuffer:
readline.insert_text(self.linebuffer)
@ -266,9 +346,9 @@ class DummyCompleter(Completer):
def set_line(self,line):
print(f'# {line}')
def get_completer(opt:Args)->Completer:
def get_completer(opt:Args, models=[])->Completer:
if readline_available:
completer = Completer(COMMANDS)
completer = Completer(COMMANDS,models)
readline.set_completer(
completer.complete

View File

@ -41,10 +41,12 @@ class CodeFormerRestoration():
cf.eval()
image = image.convert('RGB')
# Codeformer expects a BGR np array; make array and flip channels
bgr_image_array = np.array(image, dtype=np.uint8)[...,::-1]
face_helper = FaceRestoreHelper(upscale_factor=1, use_parse=True, device=device)
face_helper.clean_all()
face_helper.read_image(np.array(image, dtype=np.uint8))
face_helper.read_image(bgr_image_array)
face_helper.get_face_landmarks_5(resize=640, eye_dist_threshold=5)
face_helper.align_warp_face()
@ -71,7 +73,8 @@ class CodeFormerRestoration():
restored_img = face_helper.paste_faces_to_input_image()
res = Image.fromarray(restored_img)
# Flip the channels back to RGB
res = Image.fromarray(restored_img[...,::-1])
if strength < 1.0:
# Resize the image to the new image if the sizes have changed

View File

@ -55,13 +55,18 @@ class GFPGAN():
image = image.convert('RGB')
# GFPGAN expects a BGR np array; make array and flip channels
bgr_image_array = np.array(image, dtype=np.uint8)[...,::-1]
_, _, restored_img = self.gfpgan.enhance(
np.array(image, dtype=np.uint8),
bgr_image_array,
has_aligned=False,
only_center_face=False,
paste_back=True,
)
res = Image.fromarray(restored_img)
# Flip the channels back to RGB
res = Image.fromarray(restored_img[...,::-1])
if strength < 1.0:
# Resize the image to the new image if the sizes have changed

View File

@ -60,14 +60,18 @@ class ESRGAN():
print(
f'>> Real-ESRGAN Upscaling seed:{seed} : scale:{upsampler_scale}x'
)
# REALSRGAN expects a BGR np array; make array and flip channels
bgr_image_array = np.array(image, dtype=np.uint8)[...,::-1]
output, _ = upsampler.enhance(
np.array(image, dtype=np.uint8),
bgr_image_array,
outscale=upsampler_scale,
alpha_upsampler='realesrgan',
)
res = Image.fromarray(output)
# Flip the channels back to RGB
res = Image.fromarray(output[...,::-1])
if strength < 1.0:
# Resize the image to the new image if the sizes have changed

30
ldm/invoke/seamless.py Normal file
View File

@ -0,0 +1,30 @@
import torch.nn as nn
def _conv_forward_asymmetric(self, input, weight, bias):
"""
Patch for Conv2d._conv_forward that supports asymmetric padding
"""
working = nn.functional.pad(input, self.asymmetric_padding['x'], mode=self.asymmetric_padding_mode['x'])
working = nn.functional.pad(working, self.asymmetric_padding['y'], mode=self.asymmetric_padding_mode['y'])
return nn.functional.conv2d(working, weight, bias, self.stride, nn.modules.utils._pair(0), self.dilation, self.groups)
def configure_model_padding(model, seamless, seamless_axes):
"""
Modifies the 2D convolution layers to use a circular padding mode based on the `seamless` and `seamless_axes` options.
"""
for m in model.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
if seamless:
m.asymmetric_padding_mode = {}
m.asymmetric_padding = {}
m.asymmetric_padding_mode['x'] = 'circular' if ('x' in seamless_axes) else 'constant'
m.asymmetric_padding['x'] = (m._reversed_padding_repeated_twice[0], m._reversed_padding_repeated_twice[1], 0, 0)
m.asymmetric_padding_mode['y'] = 'circular' if ('y' in seamless_axes) else 'constant'
m.asymmetric_padding['y'] = (0, 0, m._reversed_padding_repeated_twice[2], m._reversed_padding_repeated_twice[3])
m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
else:
m._conv_forward = nn.Conv2d._conv_forward.__get__(m, nn.Conv2d)
if hasattr(m, 'asymmetric_padding_mode'):
del m.asymmetric_padding_mode
if hasattr(m, 'asymmetric_padding'):
del m.asymmetric_padding

View File

@ -31,12 +31,13 @@ def build_opt(post_data, seed, gfpgan_model_exists):
setattr(opt, 'embiggen', None)
setattr(opt, 'embiggen_tiles', None)
setattr(opt, 'gfpgan_strength', float(post_data['gfpgan_strength']) if gfpgan_model_exists else 0)
setattr(opt, 'facetool_strength', float(post_data['facetool_strength']) if gfpgan_model_exists else 0)
setattr(opt, 'upscale', [int(post_data['upscale_level']), float(post_data['upscale_strength'])] if post_data['upscale_level'] != '' else None)
setattr(opt, 'progress_images', 'progress_images' in post_data)
setattr(opt, 'seed', None if int(post_data['seed']) == -1 else int(post_data['seed']))
setattr(opt, 'threshold', float(post_data['threshold']))
setattr(opt, 'perlin', float(post_data['perlin']))
setattr(opt, 'hires_fix', 'hires_fix' in post_data)
setattr(opt, 'variation_amount', float(post_data['variation_amount']) if int(post_data['seed']) != -1 else 0)
setattr(opt, 'with_variations', [])
setattr(opt, 'embiggen', None)
@ -196,7 +197,7 @@ class DreamServer(BaseHTTPRequestHandler):
) + '\n',"utf-8"))
# control state of the "postprocessing..." message
upscaling_requested = opt.upscale or opt.gfpgan_strength > 0
upscaling_requested = opt.upscale or opt.facetool_strength > 0
nonlocal images_generated # NB: Is this bad python style? It is typical usage in a perl closure.
nonlocal images_upscaled # NB: Is this bad python style? It is typical usage in a perl closure.
if upscaled:

131
ldm/invoke/txt2mask.py Normal file
View File

@ -0,0 +1,131 @@
'''Makes available the Txt2Mask class, which assists in the automatic
assignment of masks via text prompt using clipseg.
Here is typical usage:
from ldm.invoke.txt2mask import Txt2Mask, SegmentedGrayscale
from PIL import Image
txt2mask = Txt2Mask(self.device)
segmented = txt2mask.segment(Image.open('/path/to/img.png'),'a bagel')
# this will return a grayscale Image of the segmented data
grayscale = segmented.to_grayscale()
# this will return a semi-transparent image in which the
# selected object(s) are opaque and the rest is at various
# levels of transparency
transparent = segmented.to_transparent()
# this will return a masked image suitable for use in inpainting:
mask = segmented.to_mask(threshold=0.5)
The threshold used in the call to to_mask() selects pixels for use in
the mask that exceed the indicated confidence threshold. Values range
from 0.0 to 1.0. The higher the threshold, the more confident the
algorithm is. In limited testing, I have found that values around 0.5
work fine.
'''
import torch
import numpy as np
from clipseg_models.clipseg import CLIPDensePredT
from einops import rearrange, repeat
from PIL import Image, ImageOps
from torchvision import transforms
CLIP_VERSION = 'ViT-B/16'
CLIPSEG_WEIGHTS = 'src/clipseg/weights/rd64-uni.pth'
CLIPSEG_SIZE = 352
class SegmentedGrayscale(object):
def __init__(self, image:Image, heatmap:torch.Tensor):
self.heatmap = heatmap
self.image = image
def to_grayscale(self)->Image:
return self._rescale(Image.fromarray(np.uint8(self.heatmap*255)))
def to_mask(self,threshold:float=0.5)->Image:
discrete_heatmap = self.heatmap.lt(threshold).int()
return self._rescale(Image.fromarray(np.uint8(discrete_heatmap*255),mode='L'))
def to_transparent(self,invert:bool=False)->Image:
transparent_image = self.image.copy()
gs = self.to_grayscale()
# The following line looks like a bug, but isn't.
# For img2img, we want the selected regions to be transparent,
# but to_grayscale() returns the opposite.
gs = ImageOps.invert(gs) if not invert else gs
transparent_image.putalpha(gs)
return transparent_image
# unscales and uncrops the 352x352 heatmap so that it matches the image again
def _rescale(self, heatmap:Image)->Image:
size = self.image.width if (self.image.width > self.image.height) else self.image.height
resized_image = heatmap.resize(
(size,size),
resample=Image.Resampling.LANCZOS
)
return resized_image.crop((0,0,self.image.width,self.image.height))
class Txt2Mask(object):
'''
Create new Txt2Mask object. The optional device argument can be one of
'cuda', 'mps' or 'cpu'.
'''
def __init__(self,device='cpu'):
print('>> Initializing clipseg model for text to mask inference')
self.device = device
self.model = CLIPDensePredT(version=CLIP_VERSION, reduce_dim=64, )
self.model.eval()
# initially we keep everything in cpu to conserve space
self.model.to('cpu')
self.model.load_state_dict(torch.load(CLIPSEG_WEIGHTS, map_location=torch.device('cpu')), strict=False)
@torch.no_grad()
def segment(self, image, prompt:str) -> SegmentedGrayscale:
'''
Given a prompt string such as "a bagel", tries to identify the object in the
provided image and returns a SegmentedGrayscale object in which the brighter
pixels indicate where the object is inferred to be.
'''
self._to_device(self.device)
prompts = [prompt] # right now we operate on just a single prompt at a time
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
transforms.Resize((CLIPSEG_SIZE, CLIPSEG_SIZE)), # must be multiple of 64...
])
if type(image) is str:
image = Image.open(image).convert('RGB')
image = ImageOps.exif_transpose(image)
img = self._scale_and_crop(image)
img = transform(img).unsqueeze(0)
preds = self.model(img.repeat(len(prompts),1,1,1), prompts)[0]
heatmap = torch.sigmoid(preds[0][0]).cpu()
self._to_device('cpu')
return SegmentedGrayscale(image, heatmap)
def _to_device(self, device):
self.model.to(device)
def _scale_and_crop(self, image:Image)->Image:
scaled_image = Image.new('RGB',(CLIPSEG_SIZE,CLIPSEG_SIZE))
if image.width > image.height: # width is constraint
scale = CLIPSEG_SIZE / image.width
else:
scale = CLIPSEG_SIZE / image.height
scaled_image.paste(
image.resize(
(int(scale * image.width),
int(scale * image.height)
),
resample=Image.Resampling.LANCZOS
),box=(0,0)
)
return scaled_image

View File

@ -106,7 +106,7 @@ class DDPM(pl.LightningModule):
], 'currently only supporting "eps" and "x0"'
self.parameterization = parameterization
print(
f'{self.__class__.__name__}: Running in {self.parameterization}-prediction mode'
f' | {self.__class__.__name__}: Running in {self.parameterization}-prediction mode'
)
self.cond_stage_model = None
self.clip_denoised = clip_denoised
@ -1353,7 +1353,7 @@ class LatentDiffusion(DDPM):
num_downs = self.first_stage_model.encoder.num_resolutions - 1
rescale_latent = 2 ** (num_downs)
# get top left postions of patches as conforming for the bbbox tokenizer, therefore we
# get top left positions of patches as conforming for the bbbox tokenizer, therefore we
# need to rescale the tl patch coordinates to be in between (0,1)
tl_patch_coordinates = [
(

View File

@ -98,7 +98,8 @@ class KSampler(Sampler):
rho=7.,
device=self.device,
)
self.sigmas = self.karras_sigmas
self.sigmas = self.model_sigmas
#self.sigmas = self.karras_sigmas
# ALERT: We are completely overriding the sample() method in the base class, which
# means that inpainting will not work. To get this to work we need to be able to

View File

@ -140,7 +140,7 @@ class Sampler(object):
conditioning=None,
callback=None,
normals_sequence=None,
img_callback=None,
img_callback=None, # TODO: this is very confusing because it is called "step_callback" elsewhere. Change.
quantize_x0=False,
eta=0.0,
mask=None,

View File

@ -49,9 +49,15 @@ class Upsample(nn.Module):
padding=1)
def forward(self, x):
cpu_m1_cond = True if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and \
x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3] % 2**27 == 0 else False
if cpu_m1_cond:
x = x.to('cpu') # send to cpu
x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
if self.with_conv:
x = self.conv(x)
if cpu_m1_cond:
x = x.to('mps') # return to mps
return x
@ -117,6 +123,14 @@ class ResnetBlock(nn.Module):
padding=0)
def forward(self, x, temb):
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
x_size = x.size()
if (x_size[0] * x_size[1] * x_size[2] * x_size[3]) % 2**29 == 0:
self.to('cpu')
x = x.to('cpu')
else:
self.to('mps')
x = x.to('mps')
h = self.norm1(x)
h = silu(h)
h = self.conv1(h)
@ -245,7 +259,7 @@ class AttnBlock(nn.Module):
def make_attn(in_channels, attn_type="vanilla"):
assert attn_type in ["vanilla", "linear", "none"], f'attn_type {attn_type} unknown'
print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
print(f" | Making attention of type '{attn_type}' with {in_channels} in_channels")
if attn_type == "vanilla":
return AttnBlock(in_channels)
elif attn_type == "none":
@ -521,7 +535,7 @@ class Decoder(nn.Module):
block_in = ch*ch_mult[self.num_resolutions-1]
curr_res = resolution // 2**(self.num_resolutions-1)
self.z_shape = (1,z_channels,curr_res,curr_res)
print("Working with z of shape {} = {} dimensions.".format(
print(" | Working with z of shape {} = {} dimensions.".format(
self.z_shape, np.prod(self.z_shape)))
# z to block_in

View File

@ -64,7 +64,9 @@ def make_ddim_timesteps(
):
if ddim_discr_method == 'uniform':
c = num_ddpm_timesteps // num_ddim_timesteps
ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c)))
if c < 1:
c = 1
ddim_timesteps = (np.arange(0, num_ddim_timesteps) * c).astype(int)
elif ddim_discr_method == 'quad':
ddim_timesteps = (
(
@ -81,8 +83,8 @@ def make_ddim_timesteps(
# assert ddim_timesteps.shape[0] == num_ddim_timesteps
# add one to get the final alpha values right (the ones from first scale to data during sampling)
# steps_out = ddim_timesteps + 1
steps_out = ddim_timesteps
steps_out = ddim_timesteps + 1
# steps_out = ddim_timesteps
if verbose:
print(f'Selected timesteps for ddim sampler: {steps_out}')

View File

@ -75,7 +75,7 @@ def count_params(model, verbose=False):
total_params = sum(p.numel() for p in model.parameters())
if verbose:
print(
f'{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.'
f' | {model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.'
)
return total_params

26
main.py
View File

@ -439,7 +439,7 @@ class ImageLogger(Callback):
self.rescale = rescale
self.batch_freq = batch_frequency
self.max_images = max_images
self.logger_log_images = { pl.loggers.TestTubeLogger: self._testtube, } if torch.cuda.is_available() else { }
self.logger_log_images = { }
self.log_steps = [
2**n for n in range(int(np.log2(self.batch_freq)) + 1)
]
@ -451,17 +451,6 @@ class ImageLogger(Callback):
self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {}
self.log_first_step = log_first_step
@rank_zero_only
def _testtube(self, pl_module, images, batch_idx, split):
for k in images:
grid = torchvision.utils.make_grid(images[k])
grid = (grid + 1.0) / 2.0 # -1,1 -> 0,1; c,h,w
tag = f'{split}/{k}'
pl_module.logger.experiment.add_image(
tag, grid, global_step=pl_module.global_step
)
@rank_zero_only
def log_local(
self, save_dir, split, images, global_step, current_epoch, batch_idx
@ -714,7 +703,7 @@ if __name__ == '__main__':
# merge trainer cli with config
trainer_config = lightning_config.get('trainer', OmegaConf.create())
# default to ddp
trainer_config['accelerator'] = 'ddp'
trainer_config['accelerator'] = 'auto'
for k in nondefault_trainer_args(opt):
trainer_config[k] = getattr(opt, k)
if not 'gpus' in trainer_config:
@ -751,12 +740,8 @@ if __name__ == '__main__':
trainer_kwargs = dict()
# default logger configs
if torch.cuda.is_available():
def_logger = 'testtube'
def_logger_target = 'TestTubeLogger'
else:
def_logger = 'csv'
def_logger_target = 'CSVLogger'
def_logger = 'csv'
def_logger_target = 'CSVLogger'
default_logger_cfgs = {
'wandb': {
'target': 'pytorch_lightning.loggers.WandbLogger',
@ -918,7 +903,8 @@ if __name__ == '__main__':
config.model.base_learning_rate,
)
if not cpu:
ngpu = len(lightning_config.trainer.gpus.strip(',').split(','))
gpus = str(lightning_config.trainer.gpus).strip(', ').split(',')
ngpu = len(gpus)
else:
ngpu = 1
if 'accumulate_grad_batches' in lightning_config.trainer:

View File

@ -6,7 +6,7 @@
"id": "ycYWcsEKc6w7"
},
"source": [
"# Stable Diffusion AI Notebook (Release 1.14)\n",
"# Stable Diffusion AI Notebook (Release 2.0.0)\n",
"\n",
"<img src=\"https://user-images.githubusercontent.com/60411196/186547976-d9de378a-9de8-4201-9c25-c057a9c59bad.jpeg\" alt=\"stable-diffusion-ai\" width=\"170px\"/> <br>\n",
"#### Instructions:\n",
@ -58,8 +58,8 @@
"from os.path import exists\n",
"\n",
"!git clone --quiet https://github.com/invoke-ai/InvokeAI.git # Original repo\n",
"%cd /content/stable-diffusion/\n",
"!git checkout --quiet tags/release-1.14.1"
"%cd /content/InvokeAI/\n",
"!git checkout --quiet tags/v2.0.0"
]
},
{
@ -79,6 +79,7 @@
"!pip install colab-xterm\n",
"!pip install -r requirements-lin-win-colab-CUDA.txt\n",
"!pip install clean-fid torchtext\n",
"!pip install transformers\n",
"gc.collect()"
]
},
@ -106,7 +107,7 @@
"source": [
"#@title 5. Load small ML models required\n",
"import gc\n",
"%cd /content/stable-diffusion/\n",
"%cd /content/InvokeAI/\n",
"!python scripts/preload_models.py\n",
"gc.collect()"
]
@ -171,18 +172,18 @@
"import os \n",
"\n",
"# Folder creation if it doesn't exist\n",
"if exists(\"/content/stable-diffusion/models/ldm/stable-diffusion-v1\"):\n",
"if exists(\"/content/InvokeAI/models/ldm/stable-diffusion-v1\"):\n",
" print(\"❗ Dir stable-diffusion-v1 already exists\")\n",
"else:\n",
" %mkdir /content/stable-diffusion/models/ldm/stable-diffusion-v1\n",
" %mkdir /content/InvokeAI/models/ldm/stable-diffusion-v1\n",
" print(\"✅ Dir stable-diffusion-v1 created\")\n",
"\n",
"# Symbolic link if it doesn't exist\n",
"if exists(\"/content/stable-diffusion/models/ldm/stable-diffusion-v1/model.ckpt\"):\n",
"if exists(\"/content/InvokeAI/models/ldm/stable-diffusion-v1/model.ckpt\"):\n",
" print(\"❗ Symlink already created\")\n",
"else: \n",
" src = model_path\n",
" dst = '/content/stable-diffusion/models/ldm/stable-diffusion-v1/model.ckpt'\n",
" dst = '/content/InvokeAI/models/ldm/stable-diffusion-v1/model.ckpt'\n",
" os.symlink(src, dst) \n",
" print(\"✅ Symbolic link created successfully\")"
]
@ -207,7 +208,7 @@
"source": [
"#@title 9. Run Terminal and Execute Dream bot\n",
"#@markdown <font color=\"blue\">Steps:</font> <br>\n",
"#@markdown 1. Execute command `python scripts/dream.py` to run dream bot.<br>\n",
"#@markdown 1. Execute command `python scripts/invoke.py` to run InvokeAI.<br>\n",
"#@markdown 2. After initialized you'll see `Dream>` line.<br>\n",
"#@markdown 3. Example text: `Astronaut floating in a distant galaxy` <br>\n",
"#@markdown 4. To quit Dream bot use: `q` command.<br>\n",
@ -233,7 +234,7 @@
"%matplotlib inline\n",
"\n",
"images = []\n",
"for img_path in sorted(glob.glob('/content/stable-diffusion/outputs/img-samples/*.png'), reverse=True):\n",
"for img_path in sorted(glob.glob('/content/InvokeAI/outputs/img-samples/*.png'), reverse=True):\n",
" images.append(mpimg.imread(img_path))\n",
"\n",
"images = images[:15] \n",

View File

@ -1,5 +1,6 @@
albumentations==0.4.3
einops==0.3.0
diffusers==0.6.0
huggingface-hub==0.8.1
imageio==2.9.0
imageio-ffmpeg==0.4.2
@ -12,14 +13,15 @@ pillow==9.2.0
pudb==2019.2
torch==1.12.1
torchvision==0.13.0
pytorch-lightning==1.4.2
pytorch-lightning==1.7.7
streamlit==1.12.0
test-tube>=0.7.5
torch-fidelity==0.3.0
torchmetrics==0.6.0
transformers==4.19.2
transformers==4.21.3
-e git+https://github.com/openai/CLIP.git@main#egg=clip
-e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
-e git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion
-e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
-e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
-e .

View File

@ -1,6 +1,6 @@
-r requirements.txt
protobuf==3.19.4
protobuf==3.19.6
torch
torchvision
-e .

View File

@ -32,6 +32,8 @@ send2trash
dependency_injector==4.40.0
eventlet
realesrgan
diffusers
git+https://github.com/openai/CLIP.git@main#egg=clip
git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion
git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
-e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg

View File

@ -1,12 +1,11 @@
#!/usr/bin/env python3
# Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
import sys
import os.path
script_path = sys.argv[0]
script_args = sys.argv[1:]
script_dir,script_name = os.path.split(script_path)
script_dest = os.path.join(script_dir,'invoke.py')
os.execlp('python3','python3',script_dest,*script_args)
import warnings
import invoke
if __name__ == '__main__':
warnings.warn("dream.py is being deprecated, please run invoke.py for the "
"new UI/API or legacy_api.py for the old API",
DeprecationWarning)
invoke.main()

View File

@ -9,6 +9,7 @@ import copy
import warnings
import time
import traceback
import yaml
sys.path.append('.') # corrects a weird problem on Macs
from ldm.invoke.readline import get_completer
from ldm.invoke.args import Args, metadata_dumps, metadata_from_png, dream_cmd_from_png
@ -16,11 +17,15 @@ from ldm.invoke.pngwriter import PngWriter, retrieve_metadata, write_metadata
from ldm.invoke.image_util import make_grid
from ldm.invoke.log import write_log
from omegaconf import OmegaConf
from backend.invoke_ai_web_server import InvokeAIWebServer
from pathlib import Path
# global used in multiple functions (fix)
infile = None
def main():
"""Initialize command-line parsers and the diffusion model"""
global infile
opt = Args()
args = opt.parse_args()
if not args:
@ -33,7 +38,7 @@ def main():
print('--weights argument has been deprecated. Please edit ./configs/models.yaml, and select the weights using --model instead.')
sys.exit(-1)
print('* Initializing, be patient...\n')
print('* Initializing, be patient...')
from ldm.generate import Generate
# these two lines prevent a horrible warning message from appearing
@ -42,52 +47,13 @@ def main():
transformers.logging.set_verbosity_error()
# Loading Face Restoration and ESRGAN Modules
try:
gfpgan, codeformer, esrgan = None, None, None
if opt.restore or opt.esrgan:
from ldm.invoke.restoration import Restoration
restoration = Restoration()
if opt.restore:
gfpgan, codeformer = restoration.load_face_restore_models(opt.gfpgan_dir, opt.gfpgan_model_path)
else:
print('>> Face restoration disabled')
if opt.esrgan:
esrgan = restoration.load_esrgan(opt.esrgan_bg_tile)
else:
print('>> Upscaling disabled')
else:
print('>> Face restoration and upscaling disabled')
except (ModuleNotFoundError, ImportError):
print(traceback.format_exc(), file=sys.stderr)
print('>> You may need to install the ESRGAN and/or GFPGAN modules')
# creating a simple text2image object with a handful of
# defaults passed on the command line.
# additional parameters will be added (or overriden) during
# the user input loop
try:
gen = Generate(
conf = opt.conf,
model = opt.model,
sampler_name = opt.sampler_name,
embedding_path = opt.embedding_path,
full_precision = opt.full_precision,
precision = opt.precision,
gfpgan=gfpgan,
codeformer=codeformer,
esrgan=esrgan,
free_gpu_mem=opt.free_gpu_mem,
)
except (FileNotFoundError, IOError, KeyError) as e:
print(f'{e}. Aborting.')
sys.exit(-1)
gfpgan,codeformer,esrgan = load_face_restoration(opt)
# make sure the output directory exists
if not os.path.exists(opt.outdir):
os.makedirs(opt.outdir)
# load the infile as a list of lines
infile = None
if opt.infile:
try:
if os.path.isfile(opt.infile):
@ -100,6 +66,25 @@ def main():
print(f'{e}. Aborting.')
sys.exit(-1)
# creating a Generate object:
try:
gen = Generate(
conf = opt.conf,
model = opt.model,
sampler_name = opt.sampler_name,
embedding_path = opt.embedding_path,
full_precision = opt.full_precision,
precision = opt.precision,
gfpgan=gfpgan,
codeformer=codeformer,
esrgan=esrgan,
free_gpu_mem=opt.free_gpu_mem,
safety_checker=opt.safety_checker,
)
except (FileNotFoundError, IOError, KeyError) as e:
print(f'{e}. Aborting.')
sys.exit(-1)
if opt.seamless:
print(">> changed to seamless tiling mode")
@ -116,20 +101,26 @@ def main():
"\n* Initialization done! Awaiting your command (-h for help, 'q' to quit)"
)
main_loop(gen, opt, infile)
try:
main_loop(gen, opt)
except KeyboardInterrupt:
print("\ngoodbye!")
# TODO: main_loop() has gotten busy. Needs to be refactored.
def main_loop(gen, opt, infile):
def main_loop(gen, opt):
"""prompt/read/execute loop"""
global infile
done = False
doneAfterInFile = infile is not None
path_filter = re.compile(r'[<>:"/\\|?*]')
last_results = list()
model_config = OmegaConf.load(opt.conf)[opt.model]
model_config = OmegaConf.load(opt.conf)
# The readline completer reads history from the .dream_history file located in the
# output directory specified at the time of script launch. We do not currently support
# changing the history file midstream when the output directory is changed.
completer = get_completer(opt)
completer = get_completer(opt, models=list(model_config.keys()))
completer.set_default_dir(opt.outdir)
output_cntr = completer.get_current_history_length()+1
# os.pathconf is not available on Windows
@ -141,15 +132,14 @@ def main_loop(gen, opt, infile):
name_max = 255
while not done:
operation = 'generate' # default operation, alternative is 'postprocess'
if completer:
completer.set_default_dir(opt.outdir)
operation = 'generate'
try:
command = get_next_command(infile)
except EOFError:
done = True
done = infile is None or doneAfterInFile
infile = None
continue
# skip empty lines
@ -164,41 +154,10 @@ def main_loop(gen, opt, infile):
break
if command.startswith('!'):
subcommand = command[1:]
command, operation = do_command(command, gen, opt, completer)
if subcommand.startswith('dream'): # in case a stored prompt still contains the !dream command
command = command.replace('!dream ','',1)
elif subcommand.startswith('fix'):
command = command.replace('!fix ','',1)
operation = 'postprocess'
elif subcommand.startswith('fetch'):
file_path = command.replace('!fetch ','',1)
retrieve_dream_command(opt,file_path,completer)
continue
elif subcommand.startswith('history'):
completer.show_history()
continue
elif subcommand.startswith('search'):
search_str = command.replace('!search ','',1)
completer.show_history(search_str)
continue
elif subcommand.startswith('clear'):
completer.clear_history()
continue
elif re.match('^(\d+)',subcommand):
command_no = re.match('^(\d+)',subcommand).groups()[0]
command = completer.get_line(int(command_no))
completer.set_line(command)
continue
else: # not a recognized subcommand, so give the --help text
command = '-h'
if operation is None:
continue
if opt.parse_cmd(command) is None:
continue
@ -218,9 +177,9 @@ def main_loop(gen, opt, infile):
# width and height are set by model if not specified
if not opt.width:
opt.width = model_config.width
opt.width = gen.width
if not opt.height:
opt.height = model_config.height
opt.height = gen.height
# retrieve previous value of init image if requested
if opt.init_img is not None and re.match('^-\\d+$', opt.init_img):
@ -275,9 +234,13 @@ def main_loop(gen, opt, infile):
os.makedirs(opt.outdir)
current_outdir = opt.outdir
# write out the history at this point
# Write out the history at this point.
# TODO: Fix the parsing of command-line parameters
# so that !operations don't need to be stripped and readded
if operation == 'postprocess':
completer.add_history(f'!fix {command}')
elif operation == 'mask':
completer.add_history(f'!mask {command}')
else:
completer.add_history(command)
@ -289,6 +252,7 @@ def main_loop(gen, opt, infile):
grid_images = dict() # seed -> Image, only used if `opt.grid`
prior_variations = opt.with_variations or []
prefix = file_writer.unique_prefix()
step_callback = make_step_callback(gen, opt, prefix) if opt.save_intermediates > 0 else None
def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None):
# note the seed is the seed of the current image
@ -296,13 +260,28 @@ def main_loop(gen, opt, infile):
# when the -v switch is used to generate variations
nonlocal prior_variations
nonlocal prefix
if use_prefix is not None:
prefix = use_prefix
path = None
if opt.grid:
grid_images[seed] = image
elif operation == 'mask':
filename = f'{prefix}.{use_prefix}.{seed}.png'
tm = opt.text_mask[0]
th = opt.text_mask[1] if len(opt.text_mask)>1 else 0.5
formatted_dream_prompt = f'!mask {opt.prompt} -tm {tm} {th}'
path = file_writer.save_image_and_prompt_to_png(
image = image,
dream_prompt = formatted_dream_prompt,
metadata = {},
name = filename,
compress_level = opt.png_compression,
)
results.append([path, formatted_dream_prompt])
else:
if use_prefix is not None:
prefix = use_prefix
postprocessed = upscaled if upscaled else operation=='postprocess'
filename, formatted_dream_prompt = prepare_image_metadata(
opt,
@ -322,6 +301,7 @@ def main_loop(gen, opt, infile):
model_hash = gen.model_hash,
),
name = filename,
compress_level = opt.png_compression,
)
# update rfc metadata
@ -340,7 +320,7 @@ def main_loop(gen, opt, infile):
results.append([path, formatted_dream_prompt])
# so that the seed autocompletes (on linux|mac when -S or --seed specified
if completer:
if completer and operation == 'generate':
completer.add_seed(seed)
completer.add_seed(first_seed)
last_results.append([path, seed])
@ -350,6 +330,7 @@ def main_loop(gen, opt, infile):
opt.last_operation='generate'
gen.prompt2image(
image_callback=image_writer,
step_callback=step_callback,
catch_interrupts=catch_ctrl_c,
**vars(opt)
)
@ -357,6 +338,10 @@ def main_loop(gen, opt, infile):
print(f'>> fixing {opt.prompt}')
opt.last_operation = do_postprocess(gen,opt,image_writer)
elif operation == 'mask':
print(f'>> generating masks from {opt.prompt}')
do_textmask(gen, opt, image_writer)
if opt.grid and len(grid_images) > 0:
grid_img = make_grid(list(grid_images.values()))
grid_seeds = list(grid_images.keys())
@ -392,13 +377,239 @@ def main_loop(gen, opt, infile):
print('goodbye!')
# TO DO: remove repetitive code and the awkward command.replace() trope
# Just do a simple parse of the command!
def do_command(command:str, gen, opt:Args, completer) -> tuple:
global infile
operation = 'generate' # default operation, alternative is 'postprocess'
if command.startswith('!dream'): # in case a stored prompt still contains the !dream command
command = command.replace('!dream ','',1)
elif command.startswith('!fix'):
command = command.replace('!fix ','',1)
operation = 'postprocess'
elif command.startswith('!mask'):
command = command.replace('!mask ','',1)
operation = 'mask'
elif command.startswith('!switch'):
model_name = command.replace('!switch ','',1)
gen.set_model(model_name)
completer.add_history(command)
operation = None
elif command.startswith('!models'):
gen.model_cache.print_models()
completer.add_history(command)
operation = None
elif command.startswith('!import'):
path = shlex.split(command)
if len(path) < 2:
print('** please provide a path to a .ckpt or .vae model file')
elif not os.path.exists(path[1]):
print(f'** {path[1]}: file not found')
else:
add_weights_to_config(path[1], gen, opt, completer)
completer.add_history(command)
operation = None
elif command.startswith('!edit'):
path = shlex.split(command)
if len(path) < 2:
print('** please provide the name of a model')
else:
edit_config(path[1], gen, opt, completer)
completer.add_history(command)
operation = None
elif command.startswith('!del'):
path = shlex.split(command)
if len(path) < 2:
print('** please provide the name of a model')
else:
del_config(path[1], gen, opt, completer)
completer.add_history(command)
operation = None
elif command.startswith('!fetch'):
file_path = command.replace('!fetch','',1).strip()
retrieve_dream_command(opt,file_path,completer)
completer.add_history(command)
operation = None
elif command.startswith('!replay'):
file_path = command.replace('!replay','',1).strip()
if infile is None and os.path.isfile(file_path):
infile = open(file_path, 'r', encoding='utf-8')
completer.add_history(command)
operation = None
elif command.startswith('!history'):
completer.show_history()
operation = None
elif command.startswith('!search'):
search_str = command.replace('!search','',1).strip()
completer.show_history(search_str)
operation = None
elif command.startswith('!clear'):
completer.clear_history()
operation = None
elif re.match('^!(\d+)',command):
command_no = re.match('^!(\d+)',command).groups()[0]
command = completer.get_line(int(command_no))
completer.set_line(command)
operation = None
else: # not a recognized command, so give the --help text
command = '-h'
return command, operation
def add_weights_to_config(model_path:str, gen, opt, completer):
print(f'>> Model import in process. Please enter the values needed to configure this model:')
print()
new_config = {}
new_config['weights'] = model_path
done = False
while not done:
model_name = input('Short name for this model: ')
if not re.match('^[\w._-]+$',model_name):
print('** model name must contain only words, digits and the characters [._-] **')
else:
done = True
new_config['description'] = input('Description of this model: ')
completer.complete_extensions(('.yaml','.yml'))
completer.linebuffer = 'configs/stable-diffusion/v1-inference.yaml'
done = False
while not done:
new_config['config'] = input('Configuration file for this model: ')
done = os.path.exists(new_config['config'])
done = False
completer.complete_extensions(('.vae.pt','.vae','.ckpt'))
while not done:
vae = input('VAE autoencoder file for this model [None]: ')
if os.path.exists(vae):
new_config['vae'] = vae
done = True
else:
done = len(vae)==0
completer.complete_extensions(None)
for field in ('width','height'):
done = False
while not done:
try:
completer.linebuffer = '512'
value = int(input(f'Default image {field}: '))
assert value >= 64 and value <= 2048
new_config[field] = value
done = True
except:
print('** Please enter a valid integer between 64 and 2048')
make_default = input('Make this the default model? [n] ') in ('y','Y')
if write_config_file(opt.conf, gen, model_name, new_config, make_default=make_default):
completer.add_model(model_name)
def del_config(model_name:str, gen, opt, completer):
current_model = gen.model_name
if model_name == current_model:
print("** Can't delete active model. !switch to another model first. **")
return
yaml_str = gen.model_cache.del_model(model_name)
tmpfile = os.path.join(os.path.dirname(opt.conf),'new_config.tmp')
with open(tmpfile, 'w') as outfile:
outfile.write(yaml_str)
os.rename(tmpfile,opt.conf)
print(f'** {model_name} deleted')
completer.del_model(model_name)
def edit_config(model_name:str, gen, opt, completer):
config = gen.model_cache.config
if model_name not in config:
print(f'** Unknown model {model_name}')
return
print(f'\n>> Editing model {model_name} from configuration file {opt.conf}')
conf = config[model_name]
new_config = {}
completer.complete_extensions(('.yaml','.yml','.ckpt','.vae.pt'))
for field in ('description', 'weights', 'vae', 'config', 'width','height'):
completer.linebuffer = str(conf[field]) if field in conf else ''
new_value = input(f'{field}: ')
new_config[field] = int(new_value) if field in ('width','height') else new_value
make_default = input('Make this the default model? [n] ') in ('y','Y')
completer.complete_extensions(None)
write_config_file(opt.conf, gen, model_name, new_config, clobber=True, make_default=make_default)
def write_config_file(conf_path, gen, model_name, new_config, clobber=False, make_default=False):
current_model = gen.model_name
op = 'modify' if clobber else 'import'
print('\n>> New configuration:')
if make_default:
new_config['default'] = True
print(yaml.dump({model_name:new_config}))
if input(f'OK to {op} [n]? ') not in ('y','Y'):
return False
try:
print('>> Verifying that new model loads...')
yaml_str = gen.model_cache.add_model(model_name, new_config, clobber)
assert gen.set_model(model_name) is not None, 'model failed to load'
except AssertionError as e:
print(f'** aborting **')
gen.model_cache.del_model(model_name)
return False
if make_default:
print('making this default')
gen.model_cache.set_default_model(model_name)
gen.model_cache.commit(conf_path)
do_switch = input(f'Keep model loaded? [y]')
if len(do_switch)==0 or do_switch[0] in ('y','Y'):
pass
else:
gen.set_model(current_model)
return True
def do_textmask(gen, opt, callback):
image_path = opt.prompt
assert os.path.exists(image_path), '** "{image_path}" not found. Please enter the name of an existing image file to mask **'
assert opt.text_mask is not None and len(opt.text_mask) >= 1, '** Please provide a text mask with -tm **'
tm = opt.text_mask[0]
threshold = float(opt.text_mask[1]) if len(opt.text_mask) > 1 else 0.5
gen.apply_textmask(
image_path = image_path,
prompt = tm,
threshold = threshold,
callback = callback,
)
def do_postprocess (gen, opt, callback):
file_path = opt.prompt # treat the prompt as the file pathname
if os.path.dirname(file_path) == '': #basename given
file_path = os.path.join(opt.outdir,file_path)
tool=None
if opt.gfpgan_strength > 0:
if opt.facetool_strength > 0:
tool = opt.facetool
elif opt.embiggen:
tool = 'embiggen'
@ -414,7 +625,7 @@ def do_postprocess (gen, opt, callback):
gen.apply_postprocessor(
image_path = file_path,
tool = tool,
gfpgan_strength = opt.gfpgan_strength,
facetool_strength = opt.facetool_strength,
codeformer_fidelity = opt.codeformer_fidelity,
save_original = opt.save_original,
upscale = opt.upscale,
@ -436,7 +647,10 @@ def add_postprocessing_to_metadata(opt,original_file,new_file,tool,command):
original_file = original_file if os.path.exists(original_file) else os.path.join(opt.outdir,original_file)
new_file = new_file if os.path.exists(new_file) else os.path.join(opt.outdir,new_file)
meta = retrieve_metadata(original_file)['sd-metadata']
img_data = meta['image']
if 'image' not in meta:
meta = metadata_dumps(opt,seeds=[opt.seed])['image']
meta['image'] = {}
img_data = meta.get('image')
pp = img_data.get('postprocessing',[]) or []
pp.append(
{
@ -460,7 +674,17 @@ def prepare_image_metadata(
if postprocessed and opt.save_original:
filename = choose_postprocess_name(opt,prefix,seed)
else:
filename = f'{prefix}.{seed}.png'
wildcards = dict(opt.__dict__)
wildcards['prefix'] = prefix
wildcards['seed'] = seed
try:
filename = opt.fnformat.format(**wildcards)
except KeyError as e:
print(f'** The filename format contains an unknown key \'{e.args[0]}\'. Will use \'{{prefix}}.{{seed}}.png\' instead')
filename = f'{prefix}.{seed}.png'
except IndexError as e:
print(f'** The filename format is broken or complete. Will use \'{{prefix}}.{{seed}}.png\' instead')
filename = f'{prefix}.{seed}.png'
if opt.variation_amount > 0:
first_seed = first_seed or seed
@ -509,6 +733,7 @@ def get_next_command(infile=None) -> str: # command string
def invoke_ai_web_server_loop(gen, gfpgan, codeformer, esrgan):
print('\n* --web was specified, starting web server...')
from backend.invoke_ai_web_server import InvokeAIWebServer
# Change working directory to the stable-diffusion directory
os.chdir(
os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
@ -547,27 +772,103 @@ def split_variations(variations_string) -> list:
else:
return parts
def retrieve_dream_command(opt,file_path,completer):
def load_face_restoration(opt):
try:
gfpgan, codeformer, esrgan = None, None, None
if opt.restore or opt.esrgan:
from ldm.invoke.restoration import Restoration
restoration = Restoration()
if opt.restore:
gfpgan, codeformer = restoration.load_face_restore_models(opt.gfpgan_dir, opt.gfpgan_model_path)
else:
print('>> Face restoration disabled')
if opt.esrgan:
esrgan = restoration.load_esrgan(opt.esrgan_bg_tile)
else:
print('>> Upscaling disabled')
else:
print('>> Face restoration and upscaling disabled')
except (ModuleNotFoundError, ImportError):
print(traceback.format_exc(), file=sys.stderr)
print('>> You may need to install the ESRGAN and/or GFPGAN modules')
return gfpgan,codeformer,esrgan
def make_step_callback(gen, opt, prefix):
destination = os.path.join(opt.outdir,'intermediates',prefix)
os.makedirs(destination,exist_ok=True)
print(f'>> Intermediate images will be written into {destination}')
def callback(img, step):
if step % opt.save_intermediates == 0 or step == opt.steps-1:
filename = os.path.join(destination,f'{step:04}.png')
image = gen.sample_to_image(img)
image.save(filename,'PNG')
return callback
def retrieve_dream_command(opt,command,completer):
'''
Given a full or partial path to a previously-generated image file,
will retrieve and format the dream command used to generate the image,
and pop it into the readline buffer (linux, Mac), or print out a comment
for cut-and-paste (windows)
Given a wildcard path to a folder with image png files,
will retrieve and format the dream command used to generate the images,
and save them to a file commands.txt for further processing
'''
dir,basename = os.path.split(file_path)
if len(command) == 0:
return
tokens = command.split()
dir,basename = os.path.split(tokens[0])
if len(dir) == 0:
path = os.path.join(opt.outdir,basename)
else:
path = file_path
path = tokens[0]
if len(tokens) > 1:
return write_commands(opt, path, tokens[1])
cmd = ''
try:
cmd = dream_cmd_from_png(path)
except OSError:
print(f'** {path}: file could not be read')
print(f'## {tokens[0]}: file could not be read')
except (KeyError, AttributeError, IndexError):
print(f'## {tokens[0]}: file has no metadata')
except:
print(f'## {tokens[0]}: file could not be processed')
if len(cmd)>0:
completer.set_line(cmd)
def write_commands(opt, file_path:str, outfilepath:str):
dir,basename = os.path.split(file_path)
try:
paths = list(Path(dir).glob(basename))
except ValueError:
print(f'## "{basename}": unacceptable pattern')
return
except (KeyError, AttributeError):
print(f'** {path}: file has no metadata')
return
completer.set_line(cmd)
commands = []
cmd = None
for path in paths:
try:
cmd = dream_cmd_from_png(path)
except (KeyError, AttributeError, IndexError):
print(f'## {path}: file has no metadata')
except:
print(f'## {path}: file could not be processed')
if cmd:
commands.append(f'# {path}')
commands.append(cmd)
if len(commands)>0:
dir,basename = os.path.split(outfilepath)
if len(dir)==0:
outfilepath = os.path.join(opt.outdir,basename)
with open(outfilepath, 'w', encoding='utf-8') as f:
f.write('\n'.join(commands))
print(f'>> File {outfilepath} with commands created')
######################################
if __name__ == '__main__':
main()

View File

@ -5,50 +5,51 @@
# two machines must share a common .cache directory.
from transformers import CLIPTokenizer, CLIPTextModel
import clip
from transformers import BertTokenizerFast
from transformers import BertTokenizerFast, AutoFeatureExtractor
import sys
import transformers
import os
import warnings
import torch
import urllib.request
import zipfile
import traceback
transformers.logging.set_verbosity_error()
#---------------------------------------------
# this will preload the Bert tokenizer fles
print('preloading bert tokenizer...', end='')
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
print('...success')
def download_bert():
print('Installing bert tokenizer (ignore deprecation errors)...', end='')
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=DeprecationWarning)
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
print('...success')
sys.stdout.flush()
#---------------------------------------------
# this will download requirements for Kornia
print('preloading Kornia requirements...', end='')
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=DeprecationWarning)
import kornia
print('...success')
def download_kornia():
print('Installing Kornia requirements...', end='')
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=DeprecationWarning)
import kornia
print('...success')
version = 'openai/clip-vit-large-patch14'
#---------------------------------------------
def download_clip():
version = 'openai/clip-vit-large-patch14'
sys.stdout.flush()
print('Loading CLIP model...',end='')
tokenizer = CLIPTokenizer.from_pretrained(version)
transformer = CLIPTextModel.from_pretrained(version)
print('...success')
print('preloading CLIP model...',end='')
sys.stdout.flush()
tokenizer = CLIPTokenizer.from_pretrained(version)
transformer = CLIPTextModel.from_pretrained(version)
print('...success')
# In the event that the user has installed GFPGAN and also elected to use
# RealESRGAN, this will attempt to download the model needed by RealESRGANer
gfpgan = False
try:
from realesrgan import RealESRGANer
gfpgan = True
except ModuleNotFoundError:
pass
if gfpgan:
print('Loading models from RealESRGAN and facexlib...',end='')
#---------------------------------------------
def download_gfpgan():
print('Installing models from RealESRGAN and facexlib...',end='')
try:
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
from facexlib.utils.face_restoration_helper import FaceRestoreHelper
@ -61,7 +62,6 @@ if gfpgan:
FaceRestoreHelper(1, det_model='retinaface_resnet50')
print('...success')
except Exception:
import traceback
print('Error loading ESRGAN:')
print(traceback.format_exc())
@ -89,21 +89,75 @@ if gfpgan:
urllib.request.urlretrieve(model_url,model_dest)
print('...success')
except Exception:
import traceback
print('Error loading GFPGAN:')
print(traceback.format_exc())
print('preloading CodeFormer model file...',end='')
try:
import urllib.request
model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth'
model_dest = 'ldm/invoke/restoration/codeformer/weights/codeformer.pth'
if not os.path.exists(model_dest):
print('Downloading codeformer model file...')
#---------------------------------------------
def download_codeformer():
print('Installing CodeFormer model file...',end='')
try:
model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth'
model_dest = 'ldm/invoke/restoration/codeformer/weights/codeformer.pth'
if not os.path.exists(model_dest):
print('Downloading codeformer model file...')
os.makedirs(os.path.dirname(model_dest), exist_ok=True)
urllib.request.urlretrieve(model_url,model_dest)
except Exception:
print('Error loading CodeFormer:')
print(traceback.format_exc())
print('...success')
#---------------------------------------------
def download_clipseg():
print('Installing clipseg model for text-based masking...',end='')
try:
model_url = 'https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download'
model_dest = 'src/clipseg/clipseg_weights.zip'
weights_dir = 'src/clipseg/weights'
if not os.path.exists(weights_dir):
os.makedirs(os.path.dirname(model_dest), exist_ok=True)
urllib.request.urlretrieve(model_url,model_dest)
except Exception:
import traceback
print('Error loading CodeFormer:')
print(traceback.format_exc())
print('...success')
with zipfile.ZipFile(model_dest,'r') as zip:
zip.extractall('src/clipseg')
os.rename('src/clipseg/clipseg_weights','src/clipseg/weights')
os.remove(model_dest)
from clipseg_models.clipseg import CLIPDensePredT
model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64, )
model.eval()
model.load_state_dict(
torch.load(
'src/clipseg/weights/rd64-uni-refined.pth',
map_location=torch.device('cpu')
),
strict=False,
)
except Exception:
print('Error installing clipseg model:')
print(traceback.format_exc())
print('...success')
#-------------------------------------
def download_safety_checker():
print('Installing safety model for NSFW content detection...',end='')
try:
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
except ModuleNotFoundError:
print('Error installing safety checker model:')
print(traceback.format_exc())
return
safety_model_id = "CompVis/stable-diffusion-safety-checker"
safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)
safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
print('...success')
#-------------------------------------
if __name__ == '__main__':
download_bert()
download_kornia()
download_clip()
download_gfpgan()
download_codeformer()
download_clipseg()
download_safety_checker()

View File

@ -35,13 +35,14 @@ class DreamBase():
perlin: float = 0.0
sampler_name: string = 'klms'
seamless: bool = False
hires_fix: bool = False
model: str = None # The model to use (currently unused)
embeddings = None # The embeddings to use (currently unused)
progress_images: bool = False
# GFPGAN
enable_gfpgan: bool
gfpgan_strength: float = 0
facetool_strength: float = 0
# Upscale
enable_upscale: bool
@ -91,12 +92,13 @@ class DreamBase():
# model: str = None # The model to use (currently unused)
# embeddings = None # The embeddings to use (currently unused)
self.seamless = 'seamless' in j
self.hires_fix = 'hires_fix' in j
self.progress_images = 'progress_images' in j
# GFPGAN
self.enable_gfpgan = 'enable_gfpgan' in j and bool(j.get('enable_gfpgan'))
if self.enable_gfpgan:
self.gfpgan_strength = float(j.get('gfpgan_strength'))
self.facetool_strength = float(j.get('facetool_strength'))
# Upscale
self.enable_upscale = 'enable_upscale' in j and bool(j.get('enable_upscale'))

View File

@ -334,11 +334,11 @@ class GeneratorService:
# TODO: Support no generation (just upscaling/gfpgan)
upscale = None if not jobRequest.enable_upscale else jobRequest.upscale
gfpgan_strength = 0 if not jobRequest.enable_gfpgan else jobRequest.gfpgan_strength
facetool_strength = 0 if not jobRequest.enable_gfpgan else jobRequest.facetool_strength
if not jobRequest.enable_generate:
# If not generating, check if we're upscaling or running gfpgan
if not upscale and not gfpgan_strength:
if not upscale and not facetool_strength:
# Invalid settings (TODO: Add message to help user)
raise CanceledException()
@ -347,7 +347,7 @@ class GeneratorService:
self.__model.upscale_and_reconstruct(
image_list = [[image,0]],
upscale = upscale,
strength = gfpgan_strength,
strength = facetool_strength,
save_original = False,
image_callback = lambda image, seed, upscaled=False: self.__on_image_result(jobRequest, image, seed, upscaled))
@ -371,10 +371,11 @@ class GeneratorService:
steps = jobRequest.steps,
variation_amount = jobRequest.variation_amount,
with_variations = jobRequest.with_variations,
gfpgan_strength = gfpgan_strength,
facetool_strength = facetool_strength,
upscale = upscale,
sampler_name = jobRequest.sampler_name,
seamless = jobRequest.seamless,
hires_fix = jobRequest.hires_fix,
embiggen = jobRequest.embiggen,
embiggen_tiles = jobRequest.embiggen_tiles,
step_callback = lambda sample, step: self.__on_progress(jobRequest, sample, step),

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='invoke-ai',
version='2.0.0',
version='2.0.2',
description='',
packages=find_packages(),
install_requires=[

162
shell.nix Normal file
View File

@ -0,0 +1,162 @@
{ pkgs ? import <nixpkgs> {}
, lib ? pkgs.lib
, stdenv ? pkgs.stdenv
, fetchurl ? pkgs.fetchurl
, runCommand ? pkgs.runCommand
, makeWrapper ? pkgs.makeWrapper
, mkShell ? pkgs.mkShell
, buildFHSUserEnv ? pkgs.buildFHSUserEnv
, frameworks ? pkgs.darwin.apple_sdk.frameworks
}:
# Setup InvokeAI environment using nix
# Simple usage:
# nix-shell
# python3 scripts/preload_models.py
# python3 scripts/invoke.py -h
let
conda-shell = { url, sha256, installPath, packages, shellHook }:
let
src = fetchurl { inherit url sha256; };
libPath = lib.makeLibraryPath ([] ++ lib.optionals (stdenv.isLinux) [ pkgs.zlib ]);
condaArch = if stdenv.system == "aarch64-darwin" then "osx-arm64" else "";
installer =
if stdenv.isDarwin then
runCommand "conda-install" {
nativeBuildInputs = [ makeWrapper ];
} ''
mkdir -p $out/bin
cp ${src} $out/bin/miniconda-installer.sh
chmod +x $out/bin/miniconda-installer.sh
makeWrapper \
$out/bin/miniconda-installer.sh \
$out/bin/conda-install \
--add-flags "-p ${installPath}" \
--add-flags "-b"
''
else if stdenv.isLinux then
runCommand "conda-install" {
nativeBuildInputs = [ makeWrapper ];
buildInputs = [ pkgs.zlib ];
}
# on line 10, we have 'unset LD_LIBRARY_PATH'
# we have to comment it out however in a way that the number of bytes in the
# file does not change. So we replace the 'u' in the line with a '#'
# The reason is that the binary payload is encoded as number
# of bytes from the top of the installer script
# and unsetting the library path prevents the zlib library from being discovered
''
mkdir -p $out/bin
sed 's/unset LD_LIBRARY_PATH/#nset LD_LIBRARY_PATH/' ${src} > $out/bin/miniconda-installer.sh
chmod +x $out/bin/miniconda-installer.sh
makeWrapper \
$out/bin/miniconda-installer.sh \
$out/bin/conda-install \
--add-flags "-p ${installPath}" \
--add-flags "-b" \
--prefix "LD_LIBRARY_PATH" : "${libPath}"
''
else {};
hook = ''
export CONDA_SUBDIR=${condaArch}
'' + shellHook;
fhs = buildFHSUserEnv {
name = "conda-shell";
targetPkgs = pkgs: [ stdenv.cc pkgs.git installer ] ++ packages;
profile = hook;
runScript = "bash";
};
shell = mkShell {
shellHook = if stdenv.isDarwin then hook else "conda-shell; exit";
packages = if stdenv.isDarwin then [ pkgs.git installer ] ++ packages else [ fhs ];
};
in shell;
packages = with pkgs; [
cmake
protobuf
libiconv
rustc
cargo
rustPlatform.bindgenHook
];
env = {
aarch64-darwin = {
envFile = "environment-mac.yml";
condaPath = (builtins.toString ./.) + "/.conda";
ptrSize = "8";
};
x86_64-linux = {
envFile = "environment.yml";
condaPath = (builtins.toString ./.) + "/.conda";
ptrSize = "8";
};
};
envFile = env.${stdenv.system}.envFile;
installPath = env.${stdenv.system}.condaPath;
ptrSize = env.${stdenv.system}.ptrSize;
shellHook = ''
conda-install
# tmpdir is too small in nix
export TMPDIR="${installPath}/tmp"
# Add conda to PATH
export PATH="${installPath}/bin:$PATH"
# Allows `conda activate` to work properly
source ${installPath}/etc/profile.d/conda.sh
# Paths for gcc if compiling some C sources with pip
export NIX_CFLAGS_COMPILE="-I${installPath}/include -I$TMPDIR/include"
export NIX_CFLAGS_LINK="-L${installPath}/lib $BINDGEN_EXTRA_CLANG_ARGS"
export PIP_EXISTS_ACTION=w
# rust-onig fails (think it writes config.h to wrong location)
mkdir -p "$TMPDIR/include"
cat <<'EOF' > "$TMPDIR/include/config.h"
#define HAVE_PROTOTYPES 1
#define STDC_HEADERS 1
#define HAVE_STRING_H 1
#define HAVE_STDARG_H 1
#define HAVE_STDLIB_H 1
#define HAVE_LIMITS_H 1
#define HAVE_INTTYPES_H 1
#define SIZEOF_INT 4
#define SIZEOF_SHORT 2
#define SIZEOF_LONG ${ptrSize}
#define SIZEOF_VOIDP ${ptrSize}
#define SIZEOF_LONG_LONG 8
EOF
conda env create -f "${envFile}" || conda env update --prune -f "${envFile}"
conda activate invokeai
'';
version = "4.12.0";
conda = {
aarch64-darwin = {
shell = conda-shell {
inherit shellHook installPath;
url = "https://repo.anaconda.com/miniconda/Miniconda3-py39_${version}-MacOSX-arm64.sh";
sha256 = "4bd112168cc33f8a4a60d3ef7e72b52a85972d588cd065be803eb21d73b625ef";
packages = [ frameworks.Security ] ++ packages;
};
};
x86_64-linux = {
shell = conda-shell {
inherit shellHook installPath;
url = "https://repo.continuum.io/miniconda/Miniconda3-py39_${version}-Linux-x86_64.sh";
sha256 = "78f39f9bae971ec1ae7969f0516017f2413f17796670f7040725dd83fcff5689";
packages = with pkgs; [ libGL glib ] ++ packages;
};
};
};
in conda.${stdenv.system}.shell

View File

@ -144,8 +144,8 @@
<input type="checkbox" name="enable_gfpgan" id="enable_gfpgan">
<label for="enable_gfpgan">Enable gfpgan</label>
</legend>
<label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength:</label>
<input value="0.8" min="0" max="1" type="number" id="gfpgan_strength" name="gfpgan_strength" step="0.05">
<label title="Strength of the gfpgan (face fixing) algorithm." for="facetool_strength">GPFGAN Strength:</label>
<input value="0.8" min="0" max="1" type="number" id="facetool_strength" name="facetool_strength" step="0.05">
</fieldset>
<fieldset id="upscale">
<legend>

View File

@ -100,8 +100,8 @@
</fieldset>
<fieldset id="gfpgan">
<div class="section-header">Post-processing options</div>
<label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength (0 to disable):</label>
<input value="0.0" min="0" max="1" type="number" id="gfpgan_strength" name="gfpgan_strength" step="0.1">
<label title="Strength of the gfpgan (face fixing) algorithm." for="facetool_strength">GPFGAN Strength (0 to disable):</label>
<input value="0.0" min="0" max="1" type="number" id="facetool_strength" name="facetool_strength" step="0.1">
<label title="Upscaling to perform using ESRGAN." for="upscale_level">Upscaling Level</label>
<select id="upscale_level" name="upscale_level" value="">
<option value="" selected>None</option>