Compare commits
23 Commits
developmen
...
2.1.3-rc1
Author | SHA1 | Date | |
---|---|---|---|
efc5a98488 | |||
1417c87928 | |||
2dd6fc2b93 | |||
22213612a0 | |||
71ee44a827 | |||
b17ca0a5e7 | |||
71bbfe4a1a | |||
5702271991 | |||
10781e7dc4 | |||
099d1157c5 | |||
ab825bf7ee | |||
10cfeb5ada | |||
e97515d045 | |||
0f04bc5789 | |||
3f74aabecd | |||
b1a99a51b7 | |||
8004f8a6d9 | |||
ff8ff2212a | |||
8e5363cd83 | |||
1450779146 | |||
8cd5d95b8a | |||
abd6407394 | |||
734dacfbe9 |
15
.gitignore
vendored
@ -194,6 +194,10 @@ checkpoints
|
||||
|
||||
# Let the frontend manage its own gitignore
|
||||
!frontend/*
|
||||
frontend/apt-get
|
||||
frontend/dist
|
||||
frontend/sudo
|
||||
frontend/update
|
||||
|
||||
# Scratch folder
|
||||
.scratch/
|
||||
@ -201,6 +205,7 @@ checkpoints
|
||||
gfpgan/
|
||||
models/ldm/stable-diffusion-v1/*.sha256
|
||||
|
||||
|
||||
# GFPGAN model files
|
||||
gfpgan/
|
||||
|
||||
@ -209,6 +214,16 @@ configs/models.yaml
|
||||
|
||||
# weights (will be created by installer)
|
||||
models/ldm/stable-diffusion-v1/*.ckpt
|
||||
models/clipseg
|
||||
models/gfpgan
|
||||
|
||||
# ignore initfile
|
||||
invokeai.init
|
||||
|
||||
# ignore environment.yml and requirements.txt
|
||||
# these are links to the real files in environments-and-requirements
|
||||
environment.yml
|
||||
requirements.txt
|
||||
|
||||
# this may be present if the user created a venv
|
||||
invokeai
|
||||
|
@ -68,12 +68,12 @@ set PATH=%INSTALL_ENV_DIR%;%INSTALL_ENV_DIR%\Library\bin;%INSTALL_ENV_DIR%\Scrip
|
||||
|
||||
@rem get the repo (and load into the current directory)
|
||||
if not exist ".git" (
|
||||
call git config --global init.defaultBranch main
|
||||
call git init
|
||||
call git config --local init.defaultBranch main
|
||||
call git remote add origin %REPO_URL%
|
||||
call git fetch
|
||||
# call git checkout origin/main -ft
|
||||
call git checkout origin/release-candidate-2-1 -ft
|
||||
call git checkout origin/release-candidate-2-1-3 -ft
|
||||
)
|
||||
|
||||
@rem activate the base env
|
||||
@ -81,6 +81,7 @@ call conda activate
|
||||
|
||||
@rem create the environment
|
||||
call conda env remove -n invokeai
|
||||
mklink environment.yml environments-and-requirements\environment-win-cuda.yml
|
||||
call conda env create
|
||||
if "%ERRORLEVEL%" NEQ "0" (
|
||||
echo ""
|
@ -42,9 +42,8 @@ umamba_exists="F"
|
||||
if [ -e "$INSTALL_ENV_DIR" ]; then export PATH="$INSTALL_ENV_DIR/bin:$PATH"; fi
|
||||
|
||||
PACKAGES_TO_INSTALL=""
|
||||
|
||||
if ! hash "conda" &>/dev/null; then PACKAGES_TO_INSTALL="$PACKAGES_TO_INSTALL conda"; fi
|
||||
if ! hash "git" &>/dev/null; then PACKAGES_TO_INSTALL="$PACKAGES_TO_INSTALL git"; fi
|
||||
if ! $(which conda) -V &>/dev/null; then PACKAGES_TO_INSTALL="$PACKAGES_TO_INSTALL conda"; fi
|
||||
if ! which git &>/dev/null; then PACKAGES_TO_INSTALL="$PACKAGES_TO_INSTALL git"; fi
|
||||
|
||||
if "$MAMBA_ROOT_PREFIX/micromamba" --version &>/dev/null; then umamba_exists="T"; fi
|
||||
|
||||
@ -83,11 +82,11 @@ if [ -e "$INSTALL_ENV_DIR" ]; then export PATH="$INSTALL_ENV_DIR/bin:$PATH"; fi
|
||||
|
||||
# get the repo (and load into the current directory)
|
||||
if [ ! -e ".git" ]; then
|
||||
git config --global init.defaultBranch main
|
||||
git init
|
||||
git config --local init.defaultBranch main
|
||||
git remote add origin "$REPO_URL"
|
||||
git fetch
|
||||
git checkout origin/release-candidate-2-1 -ft
|
||||
git checkout origin/release-candidate-2-1-3 -ft
|
||||
fi
|
||||
|
||||
# create the environment
|
||||
@ -95,13 +94,19 @@ CONDA_BASEPATH=$(conda info --base)
|
||||
source "$CONDA_BASEPATH/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
|
||||
|
||||
conda activate
|
||||
|
||||
if [ "$OS_NAME" == "mac" ]; then
|
||||
PIP_EXISTS_ACTION=w CONDA_SUBDIR=osx-${OS_ARCH} conda env create -f environment-mac.yml
|
||||
echo "Macintosh system detected. Installing MPS and CPU support."
|
||||
ln -sf environments-and-requirements/environment-mac.yml environment.yml
|
||||
else
|
||||
conda env remove -n invokeai
|
||||
conda env create -f environment.yml
|
||||
if (lsmod | grep amdgpu) &>/dev/null ; then
|
||||
echo "Linux system with AMD GPU driver detected. Installing ROCm and CPU support"
|
||||
ln -sf environments-and-requirements/environment-lin-amd.yml environment.yml
|
||||
else
|
||||
echo "Linux system detected. Installing CUDA and CPU support."
|
||||
ln -sf environments-and-requirements/environment-lin-cuda.yml environment.yml
|
||||
fi
|
||||
fi
|
||||
conda env update
|
||||
|
||||
status=$?
|
||||
|
||||
@ -125,6 +130,6 @@ else
|
||||
# tell the user their next steps
|
||||
echo "You can now start generating images by running invoke.sh (inside this folder), using ./invoke.sh"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
conda activate invokeai
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 466 KiB After Width: | Height: | Size: 466 KiB |
Before Width: | Height: | Size: 7.4 KiB After Width: | Height: | Size: 7.4 KiB |
Before Width: | Height: | Size: 539 KiB After Width: | Height: | Size: 539 KiB |
Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 7.6 KiB |
Before Width: | Height: | Size: 450 KiB After Width: | Height: | Size: 450 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 553 KiB After Width: | Height: | Size: 553 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 418 KiB After Width: | Height: | Size: 418 KiB |
Before Width: | Height: | Size: 6.1 KiB After Width: | Height: | Size: 6.1 KiB |
Before Width: | Height: | Size: 542 KiB After Width: | Height: | Size: 542 KiB |
Before Width: | Height: | Size: 9.5 KiB After Width: | Height: | Size: 9.5 KiB |
Before Width: | Height: | Size: 395 KiB After Width: | Height: | Size: 395 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 465 KiB After Width: | Height: | Size: 465 KiB |
Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 7.8 KiB |
@ -111,7 +111,7 @@ A number of caveats:
|
||||
the border.
|
||||
|
||||
4. When using the `inpaint-1.5` model, you may notice subtle changes to the area
|
||||
within the original image. This is because the model performs an
|
||||
outside the masked region. This is because the model performs an
|
||||
encoding/decoding on the image as a whole. This does not occur with the
|
||||
standard model.
|
||||
|
||||
|
@ -94,6 +94,7 @@ installation instructions below.
|
||||
You wil need one of the following:
|
||||
|
||||
- :simple-nvidia: An NVIDIA-based graphics card with 4 GB or more VRAM memory.
|
||||
- :simple-amd: An AMD-based graphics card with 4 GB or more VRAM memory (Linux only)
|
||||
- :fontawesome-brands-apple: An Apple computer with an M1 chip.
|
||||
|
||||
### :fontawesome-solid-memory: Memory
|
||||
|
58
docs/installation/INSTALL.md
Normal file
@ -0,0 +1,58 @@
|
||||
---
|
||||
title: Installation Overview
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
We offer several ways to install InvokeAI, each one suited to your
|
||||
experience and preferences.
|
||||
|
||||
1. [1-click installer](INSTALL_1CLICK.md)
|
||||
|
||||
This is an automated shell script that will handle installation of
|
||||
all dependencies for you, and is recommended for those who have
|
||||
limited or no experience with the Python programming language, are
|
||||
not currently interested in contributing to the project, and just want
|
||||
the thing to install and run. In this version, you interact with the
|
||||
web server and command-line clients through a shell script named
|
||||
`invoke.sh` (Linux/Mac) or `invoke.bat` (Windows), and perform
|
||||
updates using `update.sh` and `update.bat`.
|
||||
|
||||
2. [Pre-compiled PIP installer](INSTALL_PCP.md)
|
||||
|
||||
This is a series of installer files for which all the requirements
|
||||
for InvokeAI have been precompiled, thereby preventing the conflicts
|
||||
that sometimes occur when an external library is changed unexpectedly.
|
||||
It will leave you with an environment in which you interact directly
|
||||
with the scripts for running the web and command line clients, and
|
||||
you will update to new versions using standard developer commands.
|
||||
|
||||
This method is recommended for users with a bit of experience using
|
||||
the `git` and `pip` tools.
|
||||
|
||||
3. [Manual Installation](MANUAL_INSTALL.md)
|
||||
|
||||
In this method you will manually run the commands needed to install
|
||||
InvokeAI and its dependencies. We offer two recipes: one suited to
|
||||
those who prefer the `conda` tool, and one suited to those who prefer
|
||||
`pip` and Python virtual environments.
|
||||
|
||||
This method is recommended for users who have previously used `conda`
|
||||
or `pip` in the past, developers, and anyone who wishes to remain on
|
||||
the cutting edge of future InvokeAI development and is willing to put
|
||||
up with occasional glitches and breakage.
|
||||
|
||||
4. [Docker Installation](INSTALL_DOCKER.md)
|
||||
|
||||
We also offer a method for creating Docker containers containing
|
||||
InvokeAI and its dependencies. This method is recommended for
|
||||
individuals with experience with Docker containers and understand
|
||||
the pluses and minuses of a container-based install.
|
||||
|
||||
5. [Jupyter Notebooks Installation](INSTALL_JUPYTER.md)
|
||||
|
||||
This method is suitable for running InvokeAI on a Google Colab
|
||||
account. It is recommended for individuals who have previously
|
||||
worked on the Colab and are comfortable with the Jupyter notebook
|
||||
environment.
|
||||
|
@ -1,12 +1,12 @@
|
||||
---
|
||||
title: Docker
|
||||
Title: Docker
|
||||
---
|
||||
|
||||
# :fontawesome-brands-docker: Docker
|
||||
|
||||
## Before you begin
|
||||
|
||||
- For end users: Install Stable Diffusion locally using the instructions for
|
||||
- For end users: Install InvokeAI locally using the instructions for
|
||||
your OS.
|
||||
- For developers: For container-related development tasks or for enabling easy
|
||||
deployment to other environments (on-premises or cloud), follow these
|
||||
@ -14,7 +14,7 @@ title: Docker
|
||||
|
||||
## Why containers?
|
||||
|
||||
They provide a flexible, reliable way to build and deploy Stable Diffusion.
|
||||
They provide a flexible, reliable way to build and deploy InvokeAI.
|
||||
You'll also use a Docker volume to store the largest model files and image
|
||||
outputs as a first step in decoupling storage and compute. Future enhancements
|
||||
can do this for other assets. See [Processes](https://12factor.net/processes)
|
||||
@ -22,7 +22,7 @@ under the Twelve-Factor App methodology for details on why running applications
|
||||
in such a stateless fashion is important.
|
||||
|
||||
You can specify the target platform when building the image and running the
|
||||
container. You'll also need to specify the Stable Diffusion requirements file
|
||||
container. You'll also need to specify the InvokeAI requirements file
|
||||
that matches the container's OS and the architecture it will run on.
|
||||
|
||||
Developers on Apple silicon (M1/M2): You
|
||||
|
@ -42,14 +42,25 @@ title: Manual Installation, Linux
|
||||
```
|
||||
|
||||
5. Use anaconda to copy necessary python packages, create a new python
|
||||
environment named `invokeai` and activate the environment.
|
||||
environment named `invokeai` and then activate the environment.
|
||||
|
||||
```bash
|
||||
(base) rm -rf src # (this is a precaution in case there is already a src directory)
|
||||
(base) ~/InvokeAI$ conda env create
|
||||
(base) ~/InvokeAI$ conda activate invokeai
|
||||
(invokeai) ~/InvokeAI$
|
||||
```
|
||||
!!! todo "For systems with a CUDA (Nvidia) card:"
|
||||
|
||||
```bash
|
||||
(base) rm -rf src # (this is a precaution in case there is already a src directory)
|
||||
(base) ~/InvokeAI$ conda env create -f environment-cuda.yml
|
||||
(base) ~/InvokeAI$ conda activate invokeai
|
||||
(invokeai) ~/InvokeAI$
|
||||
```
|
||||
|
||||
!!! todo "For systems with an AMD card (using ROCm driver):"
|
||||
|
||||
```bash
|
||||
(base) rm -rf src # (this is a precaution in case there is already a src directory)
|
||||
(base) ~/InvokeAI$ conda env create -f environment-AMD.yml
|
||||
(base) ~/InvokeAI$ conda activate invokeai
|
||||
(invokeai) ~/InvokeAI$
|
||||
```
|
||||
|
||||
After these steps, your command prompt will be prefixed by `(invokeai)` as
|
||||
shown above.
|
||||
|
@ -13,22 +13,9 @@ one of the steps, so instead of cloning this repo, simply download the notebook
|
||||
from the link above and load it up in VSCode (with the appropriate extensions
|
||||
installed)/Jupyter/JupyterLab and start running the cells one-by-one.
|
||||
|
||||
Note that you will need NVIDIA drivers, Python 3.10, and Git installed
|
||||
beforehand - simplified
|
||||
[step-by-step instructions](https://github.com/invoke-ai/InvokeAI/wiki/Easy-peasy-Windows-install)
|
||||
are available in the wiki (you'll only need steps 1, 2, & 3 ).
|
||||
Note that you will need NVIDIA drivers, Python 3.10, and Git installed beforehand.
|
||||
|
||||
## **Manual Install**
|
||||
|
||||
### **pip**
|
||||
|
||||
See
|
||||
[Easy-peasy Windows install](https://github.com/invoke-ai/InvokeAI/wiki/Easy-peasy-Windows-install)
|
||||
in the wiki
|
||||
|
||||
---
|
||||
|
||||
### **Conda**
|
||||
## **Manual Install with Conda**
|
||||
|
||||
1. Install Anaconda3 (miniconda3 version) from [here](https://docs.anaconda.com/anaconda/install/windows/)
|
||||
|
||||
@ -52,23 +39,29 @@ in the wiki
|
||||
cd InvokeAI
|
||||
```
|
||||
|
||||
6. Run the following two commands:
|
||||
6. Run the following commands:
|
||||
|
||||
```batch title="step 6a"
|
||||
conda env create
|
||||
```
|
||||
!!! todo "For systems with a CUDA (Nvidia) card:"
|
||||
|
||||
```batch title="step 6b"
|
||||
conda activate invokeai
|
||||
```
|
||||
```bash
|
||||
rmdir src # (this is a precaution in case there is already a src directory)
|
||||
conda env create -f environment-cuda.yml
|
||||
conda activate invokeai
|
||||
(invokeai)>
|
||||
```
|
||||
|
||||
!!! todo "For systems with an AMD card (using ROCm driver):"
|
||||
|
||||
```bash
|
||||
rmdir src # (this is a precaution in case there is already a src directory)
|
||||
conda env create -f environment-AMD.yml
|
||||
conda activate invokeai
|
||||
(invokeai)>
|
||||
```
|
||||
|
||||
This will install all python requirements and activate the "invokeai" environment
|
||||
which sets PATH and other environment variables properly.
|
||||
|
||||
Note that the long form of the first command is `conda env create -f environment.yml`. If the
|
||||
environment file isn't specified, conda will default to `environment.yml`. You will need
|
||||
to provide the `-f` option if you wish to load a different environment file at any point.
|
||||
|
||||
7. Load the big stable diffusion weights files and a couple of smaller machine-learning models:
|
||||
|
||||
```bash
|
||||
|
363
docs/installation/MANUAL_INSTALL.md
Normal file
@ -0,0 +1,363 @@
|
||||
---
|
||||
title: Manual Installation
|
||||
---
|
||||
|
||||
# :fontawesome-brands-linux: Linux
|
||||
# :fontawesome-brands-apple: macOS
|
||||
# :fontawesome-brands-windows: Windows
|
||||
|
||||
## Introduction
|
||||
|
||||
You have two choices for manual installation, the [first
|
||||
one](#Conda_method) based on the Anaconda3 package manager (`conda`),
|
||||
and [a second one](#PIP_method) which uses basic Python virtual
|
||||
environment (`venv`) commands and the PIP package manager. Both
|
||||
methods require you to enter commands on the command-line shell, also
|
||||
known as the "console".
|
||||
|
||||
On Windows systems you are encouraged to install and use the
|
||||
[Powershell](https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.3),
|
||||
which provides compatibility with Linux and Mac shells and nice
|
||||
features such as command-line completion.
|
||||
|
||||
### Conda method
|
||||
|
||||
1. Check that your system meets the [hardware
|
||||
requirements](index.md#Hardware_Requirements) and has the appropriate
|
||||
GPU drivers installed. In particular, if you are a Linux user with an
|
||||
AMD GPU installed, you may need to install the [ROCm
|
||||
driver](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html).
|
||||
|
||||
InvokeAI does not yet support Windows machines with AMD GPUs due to
|
||||
the lack of ROCm driver support on this platform.
|
||||
|
||||
To confirm that the appropriate drivers are installed, run
|
||||
`nvidia-smi` on NVIDIA/CUDA systems, and `rocm-smi` on AMD
|
||||
systems. These should return information about the installed video
|
||||
card.
|
||||
|
||||
Macintosh users with MPS acceleration, or anybody with a CPU-only
|
||||
system, can skip this step.
|
||||
|
||||
2. You will need to install Anaconda3 and Git if they are not already
|
||||
available. Use your operating system's preferred installer, or
|
||||
download installers from the following URLs
|
||||
|
||||
- Anaconda3 (https://www.anaconda.com/)
|
||||
- git (https://git-scm.com/downloads)
|
||||
|
||||
3. Copy the InvokeAI source code from GitHub using `git`:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/invoke-ai/InvokeAI.git
|
||||
```
|
||||
|
||||
This will create InvokeAI folder where you will follow the rest of the
|
||||
steps.
|
||||
|
||||
3. Enter the newly-created InvokeAI folder. From this step forward make sure
|
||||
that you are working in the InvokeAI directory!
|
||||
|
||||
```bash
|
||||
cd InvokeAI
|
||||
```
|
||||
4. Select the appropriate environment file:
|
||||
|
||||
We have created a series of environment files suited for different
|
||||
operating systems and GPU hardware. They are located in the
|
||||
`environments-and-requirements` directory:
|
||||
|
||||
```bash
|
||||
environment-lin-amd.yml # Linux with an AMD (ROCm) GPU
|
||||
environment-lin-cuda.yml # Linux with an NVIDIA CUDA GPU
|
||||
environment-mac.yml # Macintoshes with MPS acceleration
|
||||
environment-win-cuda.yml # Windows with an NVIDA CUDA GPU
|
||||
```
|
||||
|
||||
Select the appropriate environment file, and make a link to it
|
||||
from `environment.yml` in the top-level InvokeAI directory. The
|
||||
command to do this from the top-level directory is:
|
||||
|
||||
!!! todo "Macintosh and Linux"
|
||||
|
||||
```bash
|
||||
ln -sf environments-and-requirements/environment-xxx-yyy.yml environment.yml
|
||||
```
|
||||
|
||||
Replace `xxx` and `yyy` with the appropriate OS and GPU codes.
|
||||
|
||||
!!! todo "Windows"
|
||||
|
||||
```bash
|
||||
mklink environment.yml environments-and-requirements\environment-win-cuda.yml
|
||||
```
|
||||
|
||||
Note that the order of arguments is reversed between the Linux/Mac and Windows
|
||||
commands!
|
||||
|
||||
When this is done, confirm that a file `environment.yml` has been created in
|
||||
the InvokeAI root directory and that it points to the correct file in the
|
||||
`environments-and-requirements`.
|
||||
|
||||
4. Run conda:
|
||||
|
||||
```bash
|
||||
conda env update
|
||||
```
|
||||
|
||||
This will create a new environment named `invokeai` and install all
|
||||
InvokeAI dependencies into it.
|
||||
|
||||
If something goes wrong at this point, see
|
||||
[troubleshooting](#Troubleshooting).
|
||||
|
||||
5. Activate the `invokeai` environment:
|
||||
|
||||
```bash
|
||||
conda activate invokeai
|
||||
```
|
||||
|
||||
Your command-line prompt should change to indicate that `invokeai` is active.
|
||||
|
||||
6. Load the model weights files:
|
||||
|
||||
```bash
|
||||
python scripts/preload_models.py
|
||||
```
|
||||
|
||||
(Windows users should use the backslash instead of the slash)
|
||||
|
||||
The script `preload_models.py` will interactively guide you through
|
||||
downloading and installing the weights files needed for
|
||||
InvokeAI. Note that the main Stable Diffusion weights file is
|
||||
protected by a license agreement that you have to agree to. The
|
||||
script will list the steps you need to take to create an account on
|
||||
the site that hosts the weights files, accept the agreement, and
|
||||
provide an access token that allows InvokeAI to legally download
|
||||
and install the weights files.
|
||||
|
||||
If you get an error message about a module not being installed,
|
||||
check that the `invokeai` environment is active and if not, repeat
|
||||
step 5.
|
||||
|
||||
7. Run the command-line interface or the web interface:
|
||||
|
||||
```bash
|
||||
python scripts/invoke.py # command line
|
||||
python scripts/invoke.py --web # web interface
|
||||
```
|
||||
|
||||
(Windows users replace backslash with forward slash)
|
||||
|
||||
If you choose the run the web interface, point your browser at
|
||||
http://localhost:9090 in order to load the GUI.
|
||||
|
||||
8. Render away!
|
||||
|
||||
Browse the features listed in the [Stable Diffusion Toolkit
|
||||
Docs](https://invoke-ai.git) to learn about all the things you can
|
||||
do with InvokeAI.
|
||||
|
||||
Note that some GPUs are slow to warm up. In particular, when using
|
||||
an AMD card with the ROCm driver, you may have to wait for over a
|
||||
minute the first time you try to generate an image. Fortunately, after
|
||||
the warm up period rendering will be fast.
|
||||
|
||||
9. Subsequently, to relaunch the script, be sure to run "conda
|
||||
activate invokeai", enter the `InvokeAI` directory, and then launch
|
||||
the invoke script. If you forget to activate the 'invokeai'
|
||||
environment, the script will fail with multiple `ModuleNotFound`
|
||||
errors.
|
||||
|
||||
## Updating to newer versions of the script
|
||||
|
||||
This distribution is changing rapidly. If you used the `git clone` method
|
||||
(step 5) to download the InvokeAI directory, then to update to the latest and
|
||||
greatest version, launch the Anaconda window, enter `InvokeAI` and type:
|
||||
|
||||
```bash
|
||||
git pull
|
||||
conda env update
|
||||
python scripts/preload_models.py --no-interactive #optional
|
||||
```
|
||||
|
||||
This will bring your local copy into sync with the remote one. The
|
||||
last step may be needed to take advantage of new features or released
|
||||
models. The `--no-interactive` flag will prevent the script from
|
||||
prompting you to download the big Stable Diffusion weights files.
|
||||
|
||||
## pip Install
|
||||
|
||||
To install InvokeAI with only the PIP package manager, please follow
|
||||
these steps:
|
||||
|
||||
1. Make sure you are using Python 3.9 or higher. Some InvokeAI
|
||||
features require this:
|
||||
|
||||
```bash
|
||||
python -V
|
||||
```
|
||||
|
||||
2. Install the `virtualenv` tool if you don't have it already:
|
||||
```bash
|
||||
pip install virtualenv
|
||||
```
|
||||
|
||||
3. From within the InvokeAI top-level directory, create and activate a
|
||||
virtual environment named `invokeai`:
|
||||
|
||||
```bash
|
||||
virtualenv invokeai
|
||||
source invokeai/bin/activate
|
||||
```
|
||||
|
||||
4. Pick the correct `requirements*.txt` file for your hardware and
|
||||
operating system.
|
||||
|
||||
We have created a series of environment files suited for different
|
||||
operating systems and GPU hardware. They are located in the
|
||||
`environments-and-requirements` directory:
|
||||
|
||||
```bash
|
||||
requirements-lin-amd.txt # Linux with an AMD (ROCm) GPU
|
||||
requirements-lin-arm64.txt # Linux running on arm64 systems
|
||||
requirements-lin-cuda.txt # Linux with an NVIDIA (CUDA) GPU
|
||||
requirements-mac-mps-cpu.txt # Macintoshes with MPS acceleration
|
||||
requirements-lin-win-colab-cuda.txt # Windows with an NVIDA (CUDA) GPU
|
||||
# (supports Google Colab too)
|
||||
```
|
||||
|
||||
Select the appropriate requirements file, and make a link to it
|
||||
from `environment.txt` in the top-level InvokeAI directory. The
|
||||
command to do this from the top-level directory is:
|
||||
|
||||
!!! todo "Macintosh and Linux"
|
||||
|
||||
```bash
|
||||
ln -sf environments-and-requirements/requirements-xxx-yyy.txt requirements.txt
|
||||
```
|
||||
|
||||
Replace `xxx` and `yyy` with the appropriate OS and GPU codes.
|
||||
|
||||
!!! todo "Windows"
|
||||
|
||||
```bash
|
||||
mklink requirements.txt environments-and-requirements\requirements-lin-win-colab-cuda.txt
|
||||
```
|
||||
|
||||
Note that the order of arguments is reversed between the Linux/Mac and Windows
|
||||
commands!
|
||||
|
||||
Please do not link directly to the file
|
||||
`environments-and-requirements/requirements.txt`. This is a base requirements
|
||||
file that does not have the platform-specific libraries.
|
||||
|
||||
When this is done, confirm that a file `requirements.txt` has been
|
||||
created in the InvokeAI root directory and that it points to the
|
||||
correct file in the `environments-and-requirements`.
|
||||
|
||||
5. Run PIP
|
||||
|
||||
Be sure that the `invokeai` environment is active before doing
|
||||
this:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Here are some common issues and their suggested solutions.
|
||||
|
||||
### Conda install
|
||||
|
||||
1. Conda fails before completing `conda update`:
|
||||
|
||||
The usual source of these errors is a package
|
||||
incompatibility. While we have tried to minimize these, over time
|
||||
packages get updated and sometimes introduce incompatibilities.
|
||||
|
||||
We suggest that you search [Issues](https://github.com/invoke-ai/InvokeAI/issues) or the
|
||||
Bug Report and Support channel of the [InvokeAI Discord](https://discord.gg/ZmtBAhwWhy).
|
||||
|
||||
You may also try to install the broken packages manually using PIP. To do this, activate
|
||||
the `invokeai` environment, and run `pip install` with the name and version of the
|
||||
package that is causing the incompatibility. For example:
|
||||
|
||||
```bash
|
||||
pip install test-tube==0.7.5
|
||||
```
|
||||
|
||||
You can keep doing this until all requirements are satisfied and
|
||||
the `invoke.py` script runs without errors. Please report to
|
||||
[Issues](https://github.com/invoke-ai/InvokeAI/issues) what you
|
||||
were able to do to work around the problem so that others can
|
||||
benefit from your investigation.
|
||||
|
||||
2. `preload_models.py` or `invoke.py` crashes at an early stage
|
||||
|
||||
This is usually due to an incomplete or corrupted Conda install.
|
||||
Make sure you have linked to the correct environment file and run
|
||||
`conda update` again.
|
||||
|
||||
If the problem persists, a more extreme measure is to clear Conda's
|
||||
caches and remove the `invokeai` environment:
|
||||
|
||||
```bash
|
||||
conda deactivate
|
||||
conda env remove -n invokeai
|
||||
conda clean -a
|
||||
conda update
|
||||
```
|
||||
|
||||
This removes all cached library files, including ones that may have
|
||||
been corrupted somehow. (This is not supposed to happen, but does
|
||||
anyway).
|
||||
|
||||
3. `invoke.py` crashes at a later stage.
|
||||
|
||||
If the CLI or web site had been working ok, but something
|
||||
unexpected happens later on during the session, you've encountered
|
||||
a code bug that is probably unrelated to an install issue. Please
|
||||
search [Issues](https://github.com/invoke-ai/InvokeAI/issues), file
|
||||
a bug report, or ask for help on [Discord](https://discord.gg/ZmtBAhwWhy)
|
||||
|
||||
4. My renders are running very slowly!
|
||||
|
||||
You may have installed the wrong torch (machine learning) package,
|
||||
and the system is running on CPU rather than the GPU. To check,
|
||||
look at the log messages that appear when `invoke.py` is first
|
||||
starting up. One of the earlier lines should say `Using device type
|
||||
cuda`. On AMD systems, it will also say "cuda", and on Macintoshes,
|
||||
it should say "mps". If instead the message says it is running on
|
||||
"cpu", then you may need to install the correct torch library.
|
||||
|
||||
You may be able to fix this by installing a different torch
|
||||
library. Here are the magic incantations for Conda and PIP.
|
||||
|
||||
!!! todo "For CUDA systems"
|
||||
|
||||
(conda)
|
||||
```bash
|
||||
conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia
|
||||
```
|
||||
|
||||
(pip)
|
||||
```bash
|
||||
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
|
||||
```
|
||||
|
||||
!!! todo "For AMD systems"
|
||||
|
||||
(conda)
|
||||
```bash
|
||||
conda activate invokeai
|
||||
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2/
|
||||
```
|
||||
|
||||
(pip)
|
||||
```bash
|
||||
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2/
|
||||
```
|
||||
|
||||
More information and troubleshooting tips can be found at https://pytorch.org.
|
@ -3,43 +3,43 @@ channels:
|
||||
- pytorch
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.9.*
|
||||
- pip>=22.2.2
|
||||
- albumentations=0.4.3
|
||||
- cudatoolkit
|
||||
- pytorch
|
||||
- torchvision
|
||||
- numpy=1.19
|
||||
- imageio=2.9.0
|
||||
- opencv=4.6.0
|
||||
- pillow=8.*
|
||||
- einops=0.3.0
|
||||
- eventlet
|
||||
- flask-socketio=5.3.0
|
||||
- flask=2.1.*
|
||||
- flask_cors=3.0.10
|
||||
- flask-socketio=5.3.0
|
||||
- send2trash=1.8.0
|
||||
- eventlet
|
||||
- albumentations=0.4.3
|
||||
- pudb=2019.2
|
||||
- imageio-ffmpeg=0.4.2
|
||||
- pytorch-lightning=1.7.7
|
||||
- streamlit
|
||||
- einops=0.3.0
|
||||
- imageio=2.9.0
|
||||
- kornia=0.6
|
||||
- torchmetrics=0.7.0
|
||||
- transformers=4.21.3
|
||||
- torch-fidelity=0.3.0
|
||||
- numpy=1.19
|
||||
- opencv=4.6.0
|
||||
- pillow=8.*
|
||||
- pip>=22.2.2
|
||||
- pudb=2019.2
|
||||
- python=3.9.*
|
||||
- pytorch
|
||||
- pytorch-lightning=1.7.7
|
||||
- send2trash=1.8.0
|
||||
- streamlit
|
||||
- tokenizers>=0.11.1,!=0.11.3,<0.13
|
||||
- torch-fidelity=0.3.0
|
||||
- torchmetrics=0.7.0
|
||||
- torchvision
|
||||
- transformers=4.21.3
|
||||
- pip:
|
||||
- getpass_asterisk
|
||||
- omegaconf==2.1.1
|
||||
- realesrgan==0.2.5.0
|
||||
- test-tube>=0.7.5
|
||||
- pyreadline3
|
||||
- dependency_injector==4.40.0
|
||||
- -e git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
|
||||
- -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- -e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
|
||||
- -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
- getpass_asterisk
|
||||
- gfpgan
|
||||
- omegaconf==2.1.1
|
||||
- pyreadline3
|
||||
- realesrgan
|
||||
- taming-transformers-rom1504
|
||||
- test-tube>=0.7.5
|
||||
- git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
- -e .
|
||||
variables:
|
||||
PYTORCH_ENABLE_MPS_FALLBACK: 1
|
45
environments-and-requirements/environment-lin-amd.yml
Normal file
@ -0,0 +1,45 @@
|
||||
name: invokeai
|
||||
channels:
|
||||
- pytorch
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- python>=3.9
|
||||
- pip=22.2.2
|
||||
- numpy=1.23.3
|
||||
- pip:
|
||||
- --extra-index-url https://download.pytorch.org/whl/rocm5.2/
|
||||
- albumentations==0.4.3
|
||||
- dependency_injector==4.40.0
|
||||
- diffusers==0.6.0
|
||||
- einops==0.3.0
|
||||
- eventlet
|
||||
- flask==2.1.3
|
||||
- flask_cors==3.0.10
|
||||
- flask_socketio==5.3.0
|
||||
- getpass_asterisk
|
||||
- gfpgan
|
||||
- imageio-ffmpeg==0.4.2
|
||||
- imageio==2.9.0
|
||||
- kornia==0.6.0
|
||||
- omegaconf==2.2.3
|
||||
- opencv-python==4.5.5.64
|
||||
- pillow==9.2.0
|
||||
- pudb==2019.2
|
||||
- pyreadline3
|
||||
- pytorch-lightning==1.7.7
|
||||
- realesrgan
|
||||
- send2trash==1.8.0
|
||||
- streamlit==1.12.0
|
||||
- taming-transformers-rom1504
|
||||
- test-tube>=0.7.5
|
||||
- torch
|
||||
- torch-fidelity==0.3.0
|
||||
- torchaudio
|
||||
- torchmetrics==0.7.0
|
||||
- torchvision
|
||||
- transformers==4.21.3
|
||||
- git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg
|
||||
- -e .
|
@ -13,33 +13,33 @@ dependencies:
|
||||
- cudatoolkit=11.6
|
||||
- pip:
|
||||
- albumentations==0.4.3
|
||||
- opencv-python==4.5.5.64
|
||||
- pudb==2019.2
|
||||
- imageio==2.9.0
|
||||
- imageio-ffmpeg==0.4.2
|
||||
- pytorch-lightning==1.7.7
|
||||
- omegaconf==2.2.3
|
||||
- test-tube>=0.7.5
|
||||
- streamlit==1.12.0
|
||||
- send2trash==1.8.0
|
||||
- pillow==9.2.0
|
||||
- einops==0.3.0
|
||||
- pyreadline3
|
||||
- torch-fidelity==0.3.0
|
||||
- transformers==4.21.3
|
||||
- diffusers==0.6.0
|
||||
- torchmetrics==0.7.0
|
||||
- flask==2.1.3
|
||||
- flask_socketio==5.3.0
|
||||
- flask_cors==3.0.10
|
||||
- dependency_injector==4.40.0
|
||||
- diffusers==0.6.0
|
||||
- einops==0.3.0
|
||||
- eventlet
|
||||
- flask==2.1.3
|
||||
- flask_cors==3.0.10
|
||||
- flask_socketio==5.3.0
|
||||
- getpass_asterisk
|
||||
- gfpgan
|
||||
- imageio-ffmpeg==0.4.2
|
||||
- imageio==2.9.0
|
||||
- kornia==0.6.0
|
||||
- omegaconf==2.2.3
|
||||
- opencv-python==4.5.5.64
|
||||
- pillow==9.2.0
|
||||
- pudb==2019.2
|
||||
- pyreadline3
|
||||
- pytorch-lightning==1.7.7
|
||||
- realesrgan
|
||||
- send2trash==1.8.0
|
||||
- streamlit==1.12.0
|
||||
- taming-transformers-rom1504
|
||||
- test-tube>=0.7.5
|
||||
- torch-fidelity==0.3.0
|
||||
- torchmetrics==0.7.0
|
||||
- transformers==4.21.3
|
||||
- git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
|
||||
- -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- -e git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan
|
||||
- -e git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan
|
||||
- -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
- git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
- -e .
|
@ -5,18 +5,9 @@ channels:
|
||||
dependencies:
|
||||
- python=3.9.13
|
||||
- pip=22.2.2
|
||||
|
||||
- pytorch=1.12.1
|
||||
- torchvision=0.13.1
|
||||
|
||||
# I suggest to keep the other deps sorted for convenience.
|
||||
# To determine what the latest versions should be, run:
|
||||
#
|
||||
# ```shell
|
||||
# sed -E 's/invokeai/invokeai-updated/;20,99s/- ([^=]+)==.+/- \1/' environment-mac.yml > environment-mac-updated.yml
|
||||
# CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n invokeai-updated | awk ' {print " - " $1 "==" $2;} '
|
||||
# ```
|
||||
|
||||
- albumentations=1.2.1
|
||||
- coloredlogs=15.0.1
|
||||
- diffusers=0.6.0
|
||||
@ -38,6 +29,7 @@ dependencies:
|
||||
- pytorch-lightning=1.7.7
|
||||
- scipy=1.9.3
|
||||
- streamlit=1.12.2
|
||||
- taming-transformers-rom1504
|
||||
- sympy=1.10.1
|
||||
- tensorboard=2.10.0
|
||||
- torchmetrics=0.10.1
|
||||
@ -55,11 +47,10 @@ dependencies:
|
||||
- dependency_injector==4.40.0
|
||||
- realesrgan==0.2.5.0
|
||||
- test-tube==0.7.5
|
||||
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
|
||||
- -e git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- -e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
|
||||
- -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
- git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
|
||||
- git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
- -e .
|
||||
variables:
|
||||
PYTORCH_ENABLE_MPS_FALLBACK: 1
|
45
environments-and-requirements/environment-win-cuda.yml
Normal file
@ -0,0 +1,45 @@
|
||||
name: invokeai
|
||||
channels:
|
||||
- pytorch
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- python>=3.9
|
||||
- pip=22.2.2
|
||||
- numpy=1.23.3
|
||||
- torchvision=0.13.1
|
||||
- torchaudio=0.12.1
|
||||
- pytorch=1.12.1
|
||||
- cudatoolkit=11.6
|
||||
- pip:
|
||||
- albumentations==0.4.3
|
||||
- dependency_injector==4.40.0
|
||||
- diffusers==0.6.0
|
||||
- einops==0.3.0
|
||||
- eventlet
|
||||
- flask==2.1.3
|
||||
- flask_cors==3.0.10
|
||||
- flask_socketio==5.3.0
|
||||
- getpass_asterisk
|
||||
- gfpgan
|
||||
- imageio-ffmpeg==0.4.2
|
||||
- imageio==2.9.0
|
||||
- kornia==0.6.0
|
||||
- omegaconf==2.2.3
|
||||
- opencv-python==4.5.5.64
|
||||
- pillow==9.2.0
|
||||
- pudb==2019.2
|
||||
- pyreadline3
|
||||
- pytorch-lightning==1.7.7
|
||||
- realesrgan
|
||||
- send2trash==1.8.0
|
||||
- streamlit==1.12.0
|
||||
- taming-transformers-rom1504
|
||||
- test-tube>=0.7.5
|
||||
- torch-fidelity==0.3.0
|
||||
- torchmetrics==0.7.0
|
||||
- transformers==4.21.3
|
||||
- git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
- git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg
|
||||
- -e .
|
@ -1,41 +1,38 @@
|
||||
--prefer-binary
|
||||
|
||||
albumentations
|
||||
einops
|
||||
getpass_asterisk
|
||||
huggingface-hub
|
||||
imageio-ffmpeg
|
||||
imageio
|
||||
kornia
|
||||
# pip will resolve the version which matches torch
|
||||
albumentations
|
||||
dependency_injector==4.40.0
|
||||
diffusers
|
||||
einops
|
||||
eventlet
|
||||
flask==2.1.3
|
||||
flask_cors==3.0.10
|
||||
flask_socketio==5.3.0
|
||||
flaskwebgui==0.3.7
|
||||
getpass_asterisk
|
||||
gfpgan
|
||||
huggingface-hub
|
||||
imageio
|
||||
imageio-ffmpeg
|
||||
kornia
|
||||
numpy
|
||||
omegaconf
|
||||
opencv-python
|
||||
pillow
|
||||
pip>=22
|
||||
pudb
|
||||
pytorch-lightning==1.7.7
|
||||
scikit-image>=0.19
|
||||
streamlit
|
||||
pyreadline3
|
||||
# "CompVis/taming-transformers" IS NOT INSTALLABLE
|
||||
# This is a drop-in replacement
|
||||
pytorch-lightning==1.7.7
|
||||
realesrgan
|
||||
scikit-image>=0.19
|
||||
send2trash
|
||||
streamlit
|
||||
taming-transformers-rom1504
|
||||
test-tube
|
||||
torch-fidelity
|
||||
torchmetrics
|
||||
transformers==4.21.*
|
||||
flask==2.1.3
|
||||
flask_socketio==5.3.0
|
||||
flask_cors==3.0.10
|
||||
flaskwebgui==0.3.7
|
||||
send2trash
|
||||
dependency_injector==4.40.0
|
||||
eventlet
|
||||
realesrgan
|
||||
diffusers
|
||||
git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion
|
||||
git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan
|
||||
git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan
|
||||
-e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
@ -1,4 +1,4 @@
|
||||
-r requirements.txt
|
||||
-r environments-and-requirements/requirements-base.txt
|
||||
|
||||
# Get hardware-appropriate torch/torchvision
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm5.1.1 --trusted-host https://download.pytorch.org
|
3
environments-and-requirements/requirements-lin-arm64.txt
Normal file
@ -0,0 +1,3 @@
|
||||
--pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
-r environments-and-requirements/requirements-base.txt
|
||||
-e .
|
2
environments-and-requirements/requirements-lin-cuda.txt
Normal file
@ -0,0 +1,2 @@
|
||||
-r environments-and-requirements/requirements-base.txt
|
||||
-e .
|
@ -1,4 +1,4 @@
|
||||
-r requirements.txt
|
||||
-r environments-and-requirements/requirements-base.txt
|
||||
|
||||
protobuf==3.19.6
|
||||
torch<1.13.0
|
@ -1,4 +1,4 @@
|
||||
-r requirements.txt
|
||||
-r environments-and-requirements/requirements-base.txt
|
||||
|
||||
# Get hardware-appropriate torch/torchvision
|
||||
--extra-index-url https://download.pytorch.org/whl/cu116 --trusted-host https://download.pytorch.org
|
@ -802,6 +802,10 @@ class Generate:
|
||||
|
||||
# the model cache does the loading and offloading
|
||||
cache = self.model_cache
|
||||
if not cache.valid_model(model_name):
|
||||
print(f'** "{model_name}" is not a known model name. Please check your models.yaml file')
|
||||
return self.model
|
||||
|
||||
cache.print_vram_usage()
|
||||
|
||||
# have to get rid of all references to model in order
|
||||
|
@ -554,13 +554,13 @@ class Args(object):
|
||||
postprocessing_group.add_argument(
|
||||
'--gfpgan_model_path',
|
||||
type=str,
|
||||
default='experiments/pretrained_models/GFPGANv1.4.pth',
|
||||
default='./GFPGANv1.4.pth',
|
||||
help='Indicates the path to the GFPGAN model, relative to --gfpgan_dir.',
|
||||
)
|
||||
postprocessing_group.add_argument(
|
||||
'--gfpgan_dir',
|
||||
type=str,
|
||||
default='./src/gfpgan',
|
||||
default='./models/gfpgan',
|
||||
help='Indicates the directory containing the GFPGAN code.',
|
||||
)
|
||||
web_server_group.add_argument(
|
||||
|
@ -47,7 +47,6 @@ def get_uc_and_c_and_ec(prompt_string_uncleaned, model, log_tokens=False, skip_n
|
||||
parsed_prompt = pp.parse_conjunction(prompt_string_cleaned).prompts[0]
|
||||
|
||||
parsed_negative_prompt: FlattenedPrompt = pp.parse_conjunction(unconditioned_words).prompts[0]
|
||||
print(f">> Parsed prompt to {parsed_prompt}")
|
||||
|
||||
conditioning = None
|
||||
cac_args:CrossAttentionControl.Arguments = None
|
||||
|
@ -41,15 +41,22 @@ class ModelCache(object):
|
||||
self.stack = [] # this is an LRU FIFO
|
||||
self.current_model = None
|
||||
|
||||
def valid_model(self, model_name:str)->bool:
|
||||
'''
|
||||
Given a model name, returns True if it is a valid
|
||||
identifier.
|
||||
'''
|
||||
return model_name in self.config
|
||||
|
||||
def get_model(self, model_name:str):
|
||||
'''
|
||||
Given a model named identified in models.yaml, return
|
||||
the model object. If in RAM will load into GPU VRAM.
|
||||
If on disk, will load from there.
|
||||
'''
|
||||
if model_name not in self.config:
|
||||
if not self.valid_model(model_name):
|
||||
print(f'** "{model_name}" is not a known model name. Please check your models.yaml file')
|
||||
return None
|
||||
return self.current_model
|
||||
|
||||
if self.current_model != model_name:
|
||||
if model_name not in self.models: # make room for a new one
|
||||
|
@ -10,8 +10,9 @@ from PIL import Image
|
||||
class GFPGAN():
|
||||
def __init__(
|
||||
self,
|
||||
gfpgan_dir='src/gfpgan',
|
||||
gfpgan_model_path='experiments/pretrained_models/GFPGANv1.4.pth') -> None:
|
||||
gfpgan_dir='models/gfpgan',
|
||||
gfpgan_model_path='GFPGANv1.4.pth'
|
||||
) -> None:
|
||||
|
||||
self.model_path = os.path.join(gfpgan_dir, gfpgan_model_path)
|
||||
self.gfpgan_model_exists = os.path.isfile(self.model_path)
|
||||
@ -74,6 +75,7 @@ class GFPGAN():
|
||||
image = image.resize(res.size)
|
||||
res = Image.blend(image, res, strength)
|
||||
|
||||
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
self.gfpgan = None
|
||||
|
@ -35,8 +35,8 @@ from PIL import Image, ImageOps
|
||||
from torchvision import transforms
|
||||
|
||||
CLIP_VERSION = 'ViT-B/16'
|
||||
CLIPSEG_WEIGHTS = 'src/clipseg/weights/rd64-uni.pth'
|
||||
CLIPSEG_WEIGHTS_REFINED = 'src/clipseg/weights/rd64-uni-refined.pth'
|
||||
CLIPSEG_WEIGHTS = 'models/clipseg/clipseg_weights/rd64-uni.pth'
|
||||
CLIPSEG_WEIGHTS_REFINED = 'models/clipseg/clipseg_weights/rd64-uni-refined.pth'
|
||||
CLIPSEG_SIZE = 352
|
||||
|
||||
class SegmentedGrayscale(object):
|
||||
|
@ -1,10 +1,13 @@
|
||||
from enum import Enum
|
||||
import enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
|
||||
# adapted from bloc97's CrossAttentionControl colab
|
||||
# https://github.com/bloc97/CrossAttentionControl
|
||||
|
||||
|
||||
|
||||
class CrossAttentionControl:
|
||||
|
||||
class Arguments:
|
||||
@ -27,7 +30,14 @@ class CrossAttentionControl:
|
||||
print('warning: cross-attention control options are not working properly for >1 edit')
|
||||
self.edit_options = non_none_edit_options[0]
|
||||
|
||||
|
||||
class Context:
|
||||
|
||||
class Action(enum.Enum):
|
||||
NONE = 0
|
||||
SAVE = 1,
|
||||
APPLY = 2
|
||||
|
||||
def __init__(self, arguments: 'CrossAttentionControl.Arguments', step_count: int):
|
||||
"""
|
||||
:param arguments: Arguments for the cross-attention control process
|
||||
@ -36,14 +46,124 @@ class CrossAttentionControl:
|
||||
self.arguments = arguments
|
||||
self.step_count = step_count
|
||||
|
||||
self.self_cross_attention_module_identifiers = []
|
||||
self.tokens_cross_attention_module_identifiers = []
|
||||
|
||||
self.saved_cross_attention_maps = {}
|
||||
|
||||
self.clear_requests(cleanup=True)
|
||||
|
||||
def register_cross_attention_modules(self, model):
|
||||
for name,module in CrossAttentionControl.get_attention_modules(model,
|
||||
CrossAttentionControl.CrossAttentionType.SELF):
|
||||
self.self_cross_attention_module_identifiers.append(name)
|
||||
for name,module in CrossAttentionControl.get_attention_modules(model,
|
||||
CrossAttentionControl.CrossAttentionType.TOKENS):
|
||||
self.tokens_cross_attention_module_identifiers.append(name)
|
||||
|
||||
def request_save_attention_maps(self, cross_attention_type: 'CrossAttentionControl.CrossAttentionType'):
|
||||
if cross_attention_type == CrossAttentionControl.CrossAttentionType.SELF:
|
||||
self.self_cross_attention_action = CrossAttentionControl.Context.Action.SAVE
|
||||
else:
|
||||
self.tokens_cross_attention_action = CrossAttentionControl.Context.Action.SAVE
|
||||
|
||||
def request_apply_saved_attention_maps(self, cross_attention_type: 'CrossAttentionControl.CrossAttentionType'):
|
||||
if cross_attention_type == CrossAttentionControl.CrossAttentionType.SELF:
|
||||
self.self_cross_attention_action = CrossAttentionControl.Context.Action.APPLY
|
||||
else:
|
||||
self.tokens_cross_attention_action = CrossAttentionControl.Context.Action.APPLY
|
||||
|
||||
def is_tokens_cross_attention(self, module_identifier) -> bool:
|
||||
return module_identifier in self.tokens_cross_attention_module_identifiers
|
||||
|
||||
def get_should_save_maps(self, module_identifier: str) -> bool:
|
||||
if module_identifier in self.self_cross_attention_module_identifiers:
|
||||
return self.self_cross_attention_action == CrossAttentionControl.Context.Action.SAVE
|
||||
elif module_identifier in self.tokens_cross_attention_module_identifiers:
|
||||
return self.tokens_cross_attention_action == CrossAttentionControl.Context.Action.SAVE
|
||||
return False
|
||||
|
||||
def get_should_apply_saved_maps(self, module_identifier: str) -> bool:
|
||||
if module_identifier in self.self_cross_attention_module_identifiers:
|
||||
return self.self_cross_attention_action == CrossAttentionControl.Context.Action.APPLY
|
||||
elif module_identifier in self.tokens_cross_attention_module_identifiers:
|
||||
return self.tokens_cross_attention_action == CrossAttentionControl.Context.Action.APPLY
|
||||
return False
|
||||
|
||||
def get_active_cross_attention_control_types_for_step(self, percent_through:float=None)\
|
||||
-> list['CrossAttentionControl.CrossAttentionType']:
|
||||
"""
|
||||
Should cross-attention control be applied on the given step?
|
||||
:param percent_through: How far through the step sequence are we (0.0=pure noise, 1.0=completely denoised image). Expected range 0.0..<1.0.
|
||||
:return: A list of attention types that cross-attention control should be performed for on the given step. May be [].
|
||||
"""
|
||||
if percent_through is None:
|
||||
return [CrossAttentionControl.CrossAttentionType.SELF, CrossAttentionControl.CrossAttentionType.TOKENS]
|
||||
|
||||
opts = self.arguments.edit_options
|
||||
to_control = []
|
||||
if opts['s_start'] <= percent_through and percent_through < opts['s_end']:
|
||||
to_control.append(CrossAttentionControl.CrossAttentionType.SELF)
|
||||
if opts['t_start'] <= percent_through and percent_through < opts['t_end']:
|
||||
to_control.append(CrossAttentionControl.CrossAttentionType.TOKENS)
|
||||
return to_control
|
||||
|
||||
def save_slice(self, identifier: str, slice: torch.Tensor, dim: Optional[int], offset: int,
|
||||
slice_size: Optional[int]):
|
||||
if identifier not in self.saved_cross_attention_maps:
|
||||
self.saved_cross_attention_maps[identifier] = {
|
||||
'dim': dim,
|
||||
'slice_size': slice_size,
|
||||
'slices': {offset or 0: slice}
|
||||
}
|
||||
else:
|
||||
self.saved_cross_attention_maps[identifier]['slices'][offset or 0] = slice
|
||||
|
||||
def get_slice(self, identifier: str, requested_dim: Optional[int], requested_offset: int, slice_size: int):
|
||||
saved_attention_dict = self.saved_cross_attention_maps[identifier]
|
||||
if requested_dim is None:
|
||||
if saved_attention_dict['dim'] is not None:
|
||||
raise RuntimeError(f"dim mismatch: expected dim=None, have {saved_attention_dict['dim']}")
|
||||
return saved_attention_dict['slices'][0]
|
||||
|
||||
if saved_attention_dict['dim'] == requested_dim:
|
||||
if slice_size != saved_attention_dict['slice_size']:
|
||||
raise RuntimeError(
|
||||
f"slice_size mismatch: expected slice_size={slice_size}, have {saved_attention_dict['slice_size']}")
|
||||
return saved_attention_dict['slices'][requested_offset]
|
||||
|
||||
if saved_attention_dict['dim'] == None:
|
||||
whole_saved_attention = saved_attention_dict['slices'][0]
|
||||
if requested_dim == 0:
|
||||
return whole_saved_attention[requested_offset:requested_offset + slice_size]
|
||||
elif requested_dim == 1:
|
||||
return whole_saved_attention[:, requested_offset:requested_offset + slice_size]
|
||||
|
||||
raise RuntimeError(f"Cannot convert dim {saved_attention_dict['dim']} to requested dim {requested_dim}")
|
||||
|
||||
def get_slicing_strategy(self, identifier: str) -> Optional[tuple[int, int]]:
|
||||
saved_attention = self.saved_cross_attention_maps.get(identifier, None)
|
||||
if saved_attention is None:
|
||||
return None, None
|
||||
return saved_attention['dim'], saved_attention['slice_size']
|
||||
|
||||
def clear_requests(self, cleanup=True):
|
||||
self.tokens_cross_attention_action = CrossAttentionControl.Context.Action.NONE
|
||||
self.self_cross_attention_action = CrossAttentionControl.Context.Action.NONE
|
||||
if cleanup:
|
||||
self.saved_cross_attention_maps = {}
|
||||
|
||||
def offload_saved_attention_slices_to_cpu(self):
|
||||
for key, map_dict in self.saved_cross_attention_maps.items():
|
||||
for offset, slice in map_dict['slices'].items():
|
||||
map_dict[offset] = slice.to('cpu')
|
||||
|
||||
@classmethod
|
||||
def remove_cross_attention_control(cls, model):
|
||||
cls.remove_attention_function(model)
|
||||
|
||||
@classmethod
|
||||
def setup_cross_attention_control(cls, model,
|
||||
cross_attention_control_args: Arguments
|
||||
):
|
||||
def setup_cross_attention_control(cls, model, context: Context):
|
||||
"""
|
||||
Inject attention parameters and functions into the passed in model to enable cross attention editing.
|
||||
|
||||
@ -53,7 +173,7 @@ class CrossAttentionControl:
|
||||
"""
|
||||
|
||||
# adapted from init_attention_edit
|
||||
device = cross_attention_control_args.edited_conditioning.device
|
||||
device = context.arguments.edited_conditioning.device
|
||||
|
||||
# urgh. should this be hardcoded?
|
||||
max_length = 77
|
||||
@ -61,141 +181,82 @@ class CrossAttentionControl:
|
||||
mask = torch.zeros(max_length)
|
||||
indices_target = torch.arange(max_length, dtype=torch.long)
|
||||
indices = torch.zeros(max_length, dtype=torch.long)
|
||||
for name, a0, a1, b0, b1 in cross_attention_control_args.edit_opcodes:
|
||||
for name, a0, a1, b0, b1 in context.arguments.edit_opcodes:
|
||||
if b0 < max_length:
|
||||
if name == "equal":# or (name == "replace" and a1 - a0 == b1 - b0):
|
||||
# these tokens have not been edited
|
||||
indices[b0:b1] = indices_target[a0:a1]
|
||||
mask[b0:b1] = 1
|
||||
|
||||
cls.inject_attention_function(model)
|
||||
|
||||
for m in cls.get_attention_modules(model, cls.CrossAttentionType.SELF):
|
||||
m.last_attn_slice_mask = None
|
||||
m.last_attn_slice_indices = None
|
||||
|
||||
for m in cls.get_attention_modules(model, cls.CrossAttentionType.TOKENS):
|
||||
m.last_attn_slice_mask = mask.to(device)
|
||||
m.last_attn_slice_indices = indices.to(device)
|
||||
context.register_cross_attention_modules(model)
|
||||
context.cross_attention_mask = mask.to(device)
|
||||
context.cross_attention_index_map = indices.to(device)
|
||||
cls.inject_attention_function(model, context)
|
||||
|
||||
|
||||
class CrossAttentionType(Enum):
|
||||
class CrossAttentionType(enum.Enum):
|
||||
SELF = 1
|
||||
TOKENS = 2
|
||||
|
||||
@classmethod
|
||||
def get_active_cross_attention_control_types_for_step(cls, context: 'CrossAttentionControl.Context', percent_through:float=None)\
|
||||
-> list['CrossAttentionControl.CrossAttentionType']:
|
||||
"""
|
||||
Should cross-attention control be applied on the given step?
|
||||
:param percent_through: How far through the step sequence are we (0.0=pure noise, 1.0=completely denoised image). Expected range 0.0..<1.0.
|
||||
:return: A list of attention types that cross-attention control should be performed for on the given step. May be [].
|
||||
"""
|
||||
if percent_through is None:
|
||||
return [cls.CrossAttentionType.SELF, cls.CrossAttentionType.TOKENS]
|
||||
|
||||
opts = context.arguments.edit_options
|
||||
to_control = []
|
||||
if opts['s_start'] <= percent_through and percent_through < opts['s_end']:
|
||||
to_control.append(cls.CrossAttentionType.SELF)
|
||||
if opts['t_start'] <= percent_through and percent_through < opts['t_end']:
|
||||
to_control.append(cls.CrossAttentionType.TOKENS)
|
||||
return to_control
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_attention_modules(cls, model, which: CrossAttentionType):
|
||||
which_attn = "attn1" if which is cls.CrossAttentionType.SELF else "attn2"
|
||||
return [module for name, module in model.named_modules() if
|
||||
return [(name,module) for name, module in model.named_modules() if
|
||||
type(module).__name__ == "CrossAttention" and which_attn in name]
|
||||
|
||||
@classmethod
|
||||
def clear_requests(cls, model, clear_attn_slice=True):
|
||||
self_attention_modules = cls.get_attention_modules(model, cls.CrossAttentionType.SELF)
|
||||
tokens_attention_modules = cls.get_attention_modules(model, cls.CrossAttentionType.TOKENS)
|
||||
for m in self_attention_modules+tokens_attention_modules:
|
||||
m.save_last_attn_slice = False
|
||||
m.use_last_attn_slice = False
|
||||
if clear_attn_slice:
|
||||
m.last_attn_slice = None
|
||||
|
||||
@classmethod
|
||||
def request_save_attention_maps(cls, model, cross_attention_type: CrossAttentionType):
|
||||
modules = cls.get_attention_modules(model, cross_attention_type)
|
||||
for m in modules:
|
||||
# clear out the saved slice in case the outermost dim changes
|
||||
m.last_attn_slice = None
|
||||
m.save_last_attn_slice = True
|
||||
|
||||
@classmethod
|
||||
def request_apply_saved_attention_maps(cls, model, cross_attention_type: CrossAttentionType):
|
||||
modules = cls.get_attention_modules(model, cross_attention_type)
|
||||
for m in modules:
|
||||
m.use_last_attn_slice = True
|
||||
|
||||
|
||||
|
||||
@classmethod
|
||||
def inject_attention_function(cls, unet):
|
||||
def inject_attention_function(cls, unet, context: 'CrossAttentionControl.Context'):
|
||||
# ORIGINAL SOURCE CODE: https://github.com/huggingface/diffusers/blob/91ddd2a25b848df0fa1262d4f1cd98c7ccb87750/src/diffusers/models/attention.py#L276
|
||||
|
||||
def attention_slice_wrangler(self, attention_scores, suggested_attention_slice, dim, offset, slice_size):
|
||||
def attention_slice_wrangler(module, suggested_attention_slice:torch.Tensor, dim, offset, slice_size):
|
||||
|
||||
#print("in wrangler with suggested_attention_slice shape", suggested_attention_slice.shape, "dim", dim)
|
||||
#memory_usage = suggested_attention_slice.element_size() * suggested_attention_slice.nelement()
|
||||
|
||||
attn_slice = suggested_attention_slice
|
||||
if dim is not None:
|
||||
start = offset
|
||||
end = start+slice_size
|
||||
#print(f"in wrangler, sliced dim {dim} {start}-{end}, use_last_attn_slice is {self.use_last_attn_slice}, save_last_attn_slice is {self.save_last_attn_slice}")
|
||||
#else:
|
||||
# print(f"in wrangler, whole, use_last_attn_slice is {self.use_last_attn_slice}, save_last_attn_slice is {self.save_last_attn_slice}")
|
||||
attention_slice = suggested_attention_slice
|
||||
|
||||
if self.use_last_attn_slice:
|
||||
if dim is None:
|
||||
last_attn_slice = self.last_attn_slice
|
||||
# print("took whole slice of shape", attn_slice.shape, "from complete shape", self.last_attn_slice.shape)
|
||||
if context.get_should_save_maps(module.identifier):
|
||||
#print(module.identifier, "saving suggested_attention_slice of shape",
|
||||
# suggested_attention_slice.shape, "dim", dim, "offset", offset)
|
||||
slice_to_save = attention_slice.to('cpu') if dim is not None else attention_slice
|
||||
context.save_slice(module.identifier, slice_to_save, dim=dim, offset=offset, slice_size=slice_size)
|
||||
elif context.get_should_apply_saved_maps(module.identifier):
|
||||
#print(module.identifier, "applying saved attention slice for dim", dim, "offset", offset)
|
||||
saved_attention_slice = context.get_slice(module.identifier, dim, offset, slice_size)
|
||||
|
||||
# slice may have been offloaded to CPU
|
||||
saved_attention_slice = saved_attention_slice.to(suggested_attention_slice.device)
|
||||
|
||||
if context.is_tokens_cross_attention(module.identifier):
|
||||
index_map = context.cross_attention_index_map
|
||||
remapped_saved_attention_slice = torch.index_select(saved_attention_slice, -1, index_map)
|
||||
this_attention_slice = suggested_attention_slice
|
||||
|
||||
mask = context.cross_attention_mask
|
||||
saved_mask = mask
|
||||
this_mask = 1 - mask
|
||||
attention_slice = remapped_saved_attention_slice * saved_mask + \
|
||||
this_attention_slice * this_mask
|
||||
else:
|
||||
last_attn_slice = self.last_attn_slice[offset]
|
||||
|
||||
if self.last_attn_slice_mask is None:
|
||||
# just use everything
|
||||
attn_slice = last_attn_slice
|
||||
else:
|
||||
last_attn_slice_mask = self.last_attn_slice_mask
|
||||
remapped_last_attn_slice = torch.index_select(last_attn_slice, -1, self.last_attn_slice_indices)
|
||||
attention_slice = saved_attention_slice
|
||||
|
||||
this_attn_slice = attn_slice
|
||||
this_attn_slice_mask = 1 - last_attn_slice_mask
|
||||
attn_slice = this_attn_slice * this_attn_slice_mask + \
|
||||
remapped_last_attn_slice * last_attn_slice_mask
|
||||
|
||||
if self.save_last_attn_slice:
|
||||
if dim is None:
|
||||
self.last_attn_slice = attn_slice
|
||||
else:
|
||||
if self.last_attn_slice is None:
|
||||
self.last_attn_slice = { offset: attn_slice }
|
||||
else:
|
||||
self.last_attn_slice[offset] = attn_slice
|
||||
|
||||
return attn_slice
|
||||
return attention_slice
|
||||
|
||||
for name, module in unet.named_modules():
|
||||
module_name = type(module).__name__
|
||||
if module_name == "CrossAttention":
|
||||
module.last_attn_slice = None
|
||||
module.last_attn_slice_indices = None
|
||||
module.last_attn_slice_mask = None
|
||||
module.use_last_attn_weights = False
|
||||
module.use_last_attn_slice = False
|
||||
module.save_last_attn_slice = False
|
||||
module.identifier = name
|
||||
module.set_attention_slice_wrangler(attention_slice_wrangler)
|
||||
module.set_slicing_strategy_getter(lambda module, module_identifier=name: \
|
||||
context.get_slicing_strategy(module_identifier))
|
||||
|
||||
@classmethod
|
||||
def remove_attention_function(cls, unet):
|
||||
# clear wrangler callback
|
||||
for name, module in unet.named_modules():
|
||||
module_name = type(module).__name__
|
||||
if module_name == "CrossAttention":
|
||||
module.set_attention_slice_wrangler(None)
|
||||
module.set_slicing_strategy_getter(None)
|
||||
|
||||
|
@ -1,9 +1,11 @@
|
||||
import traceback
|
||||
from math import ceil
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import torch
|
||||
|
||||
from ldm.models.diffusion.cross_attention_control import CrossAttentionControl
|
||||
from ldm.modules.attention import get_mem_free_total
|
||||
|
||||
|
||||
class InvokeAIDiffuserComponent:
|
||||
@ -34,7 +36,7 @@ class InvokeAIDiffuserComponent:
|
||||
"""
|
||||
self.model = model
|
||||
self.model_forward_callback = model_forward_callback
|
||||
|
||||
self.cross_attention_control_context = None
|
||||
|
||||
def setup_cross_attention_control(self, conditioning: ExtraConditioningInfo, step_count: int):
|
||||
self.conditioning = conditioning
|
||||
@ -42,11 +44,7 @@ class InvokeAIDiffuserComponent:
|
||||
arguments=self.conditioning.cross_attention_control_args,
|
||||
step_count=step_count
|
||||
)
|
||||
CrossAttentionControl.setup_cross_attention_control(self.model,
|
||||
cross_attention_control_args=self.conditioning.cross_attention_control_args
|
||||
)
|
||||
#todo: refactor edited_conditioning, edit_opcodes, edit_options into a struct
|
||||
#todo: apply edit_options using step_count
|
||||
CrossAttentionControl.setup_cross_attention_control(self.model, self.cross_attention_control_context)
|
||||
|
||||
def remove_cross_attention_control(self):
|
||||
self.conditioning = None
|
||||
@ -54,6 +52,7 @@ class InvokeAIDiffuserComponent:
|
||||
CrossAttentionControl.remove_cross_attention_control(self.model)
|
||||
|
||||
|
||||
|
||||
def do_diffusion_step(self, x: torch.Tensor, sigma: torch.Tensor,
|
||||
unconditioning: Union[torch.Tensor,dict],
|
||||
conditioning: Union[torch.Tensor,dict],
|
||||
@ -70,12 +69,12 @@ class InvokeAIDiffuserComponent:
|
||||
:return: the new latents after applying the model to x using unscaled unconditioning and CFG-scaled conditioning.
|
||||
"""
|
||||
|
||||
CrossAttentionControl.clear_requests(self.model)
|
||||
|
||||
cross_attention_control_types_to_do = []
|
||||
context: CrossAttentionControl.Context = self.cross_attention_control_context
|
||||
if self.cross_attention_control_context is not None:
|
||||
percent_through = self.estimate_percent_through(step_index, sigma)
|
||||
cross_attention_control_types_to_do = CrossAttentionControl.get_active_cross_attention_control_types_for_step(self.cross_attention_control_context, percent_through)
|
||||
cross_attention_control_types_to_do = context.get_active_cross_attention_control_types_for_step(percent_through)
|
||||
|
||||
wants_cross_attention_control = (len(cross_attention_control_types_to_do) > 0)
|
||||
wants_hybrid_conditioning = isinstance(conditioning, dict)
|
||||
@ -124,7 +123,7 @@ class InvokeAIDiffuserComponent:
|
||||
return unconditioned_next_x, conditioned_next_x
|
||||
|
||||
|
||||
def apply_cross_attention_controlled_conditioning(self, x, sigma, unconditioning, conditioning, cross_attention_control_types_to_do):
|
||||
def apply_cross_attention_controlled_conditioning(self, x:torch.Tensor, sigma, unconditioning, conditioning, cross_attention_control_types_to_do):
|
||||
# print('pct', percent_through, ': doing cross attention control on', cross_attention_control_types_to_do)
|
||||
# slower non-batched path (20% slower on mac MPS)
|
||||
# We are only interested in using attention maps for conditioned_next_x, but batching them with generation of
|
||||
@ -134,32 +133,32 @@ class InvokeAIDiffuserComponent:
|
||||
# representing batched uncond + cond, but then when it comes to applying the saved attention, the
|
||||
# wrangler gets an attention tensor which only has shape[0]=8, representing just self.edited_conditionings.)
|
||||
# todo: give CrossAttentionControl's `wrangler` function more info so it can work with a batched call as well.
|
||||
context:CrossAttentionControl.Context = self.cross_attention_control_context
|
||||
|
||||
try:
|
||||
unconditioned_next_x = self.model_forward_callback(x, sigma, unconditioning)
|
||||
|
||||
# process x using the original prompt, saving the attention maps
|
||||
for type in cross_attention_control_types_to_do:
|
||||
CrossAttentionControl.request_save_attention_maps(self.model, type)
|
||||
#print("saving attention maps for", cross_attention_control_types_to_do)
|
||||
for ca_type in cross_attention_control_types_to_do:
|
||||
context.request_save_attention_maps(ca_type)
|
||||
_ = self.model_forward_callback(x, sigma, conditioning)
|
||||
CrossAttentionControl.clear_requests(self.model, clear_attn_slice=False)
|
||||
context.clear_requests(cleanup=False)
|
||||
|
||||
# process x again, using the saved attention maps to control where self.edited_conditioning will be applied
|
||||
for type in cross_attention_control_types_to_do:
|
||||
CrossAttentionControl.request_apply_saved_attention_maps(self.model, type)
|
||||
#print("applying saved attention maps for", cross_attention_control_types_to_do)
|
||||
for ca_type in cross_attention_control_types_to_do:
|
||||
context.request_apply_saved_attention_maps(ca_type)
|
||||
edited_conditioning = self.conditioning.cross_attention_control_args.edited_conditioning
|
||||
conditioned_next_x = self.model_forward_callback(x, sigma, edited_conditioning)
|
||||
context.clear_requests(cleanup=True)
|
||||
|
||||
CrossAttentionControl.clear_requests(self.model)
|
||||
|
||||
return unconditioned_next_x, conditioned_next_x
|
||||
|
||||
except RuntimeError:
|
||||
# make sure we clean out the attention slices we're storing on the model
|
||||
# TODO don't store things on the model
|
||||
CrossAttentionControl.clear_requests(self.model)
|
||||
except:
|
||||
context.clear_requests(cleanup=True)
|
||||
raise
|
||||
|
||||
return unconditioned_next_x, conditioned_next_x
|
||||
|
||||
def estimate_percent_through(self, step_index, sigma):
|
||||
if step_index is not None and self.cross_attention_control_context is not None:
|
||||
# percent_through will never reach 1.0 (but this is intended)
|
||||
|
@ -1,6 +1,6 @@
|
||||
from inspect import isfunction
|
||||
import math
|
||||
from typing import Callable
|
||||
from typing import Callable, Optional
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
@ -151,6 +151,17 @@ class SpatialSelfAttention(nn.Module):
|
||||
|
||||
return x+h_
|
||||
|
||||
def get_mem_free_total(device):
|
||||
#only on cuda
|
||||
if not torch.cuda.is_available():
|
||||
return None
|
||||
stats = torch.cuda.memory_stats(device)
|
||||
mem_active = stats['active_bytes.all.current']
|
||||
mem_reserved = stats['reserved_bytes.all.current']
|
||||
mem_free_cuda, _ = torch.cuda.mem_get_info(device)
|
||||
mem_free_torch = mem_reserved - mem_active
|
||||
mem_free_total = mem_free_cuda + mem_free_torch
|
||||
return mem_free_total
|
||||
|
||||
|
||||
class CrossAttention(nn.Module):
|
||||
@ -173,31 +184,43 @@ class CrossAttention(nn.Module):
|
||||
|
||||
self.mem_total_gb = psutil.virtual_memory().total // (1 << 30)
|
||||
|
||||
self.cached_mem_free_total = None
|
||||
self.attention_slice_wrangler = None
|
||||
self.slicing_strategy_getter = None
|
||||
|
||||
def set_attention_slice_wrangler(self, wrangler:Callable[[nn.Module, torch.Tensor, torch.Tensor, int, int, int], torch.Tensor]):
|
||||
def set_attention_slice_wrangler(self, wrangler: Optional[Callable[[nn.Module, torch.Tensor, int, int, int], torch.Tensor]]):
|
||||
'''
|
||||
Set custom attention calculator to be called when attention is calculated
|
||||
:param wrangler: Callback, with args (self, attention_scores, suggested_attention_slice, dim, offset, slice_size),
|
||||
:param wrangler: Callback, with args (module, suggested_attention_slice, dim, offset, slice_size),
|
||||
which returns either the suggested_attention_slice or an adjusted equivalent.
|
||||
self is the current CrossAttention module for which the callback is being invoked.
|
||||
attention_scores are the scores for attention
|
||||
suggested_attention_slice is a softmax(dim=-1) over attention_scores
|
||||
dim is -1 if the call is non-sliced, or 0 or 1 for dimension-0 or dimension-1 slicing.
|
||||
If dim is >= 0, offset and slice_size specify the slice start and length.
|
||||
`module` is the current CrossAttention module for which the callback is being invoked.
|
||||
`suggested_attention_slice` is the default-calculated attention slice
|
||||
`dim` is -1 if the attenion map has not been sliced, or 0 or 1 for dimension-0 or dimension-1 slicing.
|
||||
If `dim` is >= 0, `offset` and `slice_size` specify the slice start and length.
|
||||
|
||||
Pass None to use the default attention calculation.
|
||||
:return:
|
||||
'''
|
||||
self.attention_slice_wrangler = wrangler
|
||||
|
||||
def set_slicing_strategy_getter(self, getter: Optional[Callable[[nn.Module], tuple[int,int]]]):
|
||||
self.slicing_strategy_getter = getter
|
||||
|
||||
def cache_free_memory_count(self, device):
|
||||
self.cached_mem_free_total = get_mem_free_total(device)
|
||||
print("free cuda memory: ", self.cached_mem_free_total)
|
||||
|
||||
def clear_cached_free_memory_count(self):
|
||||
self.cached_mem_free_total = None
|
||||
|
||||
def einsum_lowest_level(self, q, k, v, dim, offset, slice_size):
|
||||
# calculate attention scores
|
||||
attention_scores = einsum('b i d, b j d -> b i j', q, k)
|
||||
# calculate attenion slice by taking the best scores for each latent pixel
|
||||
# calculate attention slice by taking the best scores for each latent pixel
|
||||
default_attention_slice = attention_scores.softmax(dim=-1, dtype=attention_scores.dtype)
|
||||
if self.attention_slice_wrangler is not None:
|
||||
attention_slice = self.attention_slice_wrangler(self, attention_scores, default_attention_slice, dim, offset, slice_size)
|
||||
attention_slice_wrangler = self.attention_slice_wrangler
|
||||
if attention_slice_wrangler is not None:
|
||||
attention_slice = attention_slice_wrangler(self, default_attention_slice, dim, offset, slice_size)
|
||||
else:
|
||||
attention_slice = default_attention_slice
|
||||
|
||||
@ -240,17 +263,27 @@ class CrossAttention(nn.Module):
|
||||
return self.einsum_op_slice_dim1(q, k, v, max(q.shape[1] // div, 1))
|
||||
|
||||
def einsum_op_cuda(self, q, k, v):
|
||||
stats = torch.cuda.memory_stats(q.device)
|
||||
mem_active = stats['active_bytes.all.current']
|
||||
mem_reserved = stats['reserved_bytes.all.current']
|
||||
mem_free_cuda, _ = torch.cuda.mem_get_info(q.device)
|
||||
mem_free_torch = mem_reserved - mem_active
|
||||
mem_free_total = mem_free_cuda + mem_free_torch
|
||||
# check if we already have a slicing strategy (this should only happen during cross-attention controlled generation)
|
||||
slicing_strategy_getter = self.slicing_strategy_getter
|
||||
if slicing_strategy_getter is not None:
|
||||
(dim, slice_size) = slicing_strategy_getter(self)
|
||||
if dim is not None:
|
||||
# print("using saved slicing strategy with dim", dim, "slice size", slice_size)
|
||||
if dim == 0:
|
||||
return self.einsum_op_slice_dim0(q, k, v, slice_size)
|
||||
elif dim == 1:
|
||||
return self.einsum_op_slice_dim1(q, k, v, slice_size)
|
||||
|
||||
# fallback for when there is no saved strategy, or saved strategy does not slice
|
||||
mem_free_total = self.cached_mem_free_total or get_mem_free_total(q.device)
|
||||
# Divide factor of safety as there's copying and fragmentation
|
||||
return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
|
||||
|
||||
|
||||
def get_attention_mem_efficient(self, q, k, v):
|
||||
if q.device.type == 'cuda':
|
||||
torch.cuda.empty_cache()
|
||||
#print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device))
|
||||
return self.einsum_op_cuda(q, k, v)
|
||||
|
||||
if q.device.type == 'mps':
|
||||
|
@ -65,10 +65,8 @@ def make_ddim_timesteps(
|
||||
if ddim_discr_method == 'uniform':
|
||||
c = num_ddpm_timesteps // num_ddim_timesteps
|
||||
if c < 1:
|
||||
c = 1
|
||||
|
||||
# remove 1 final step to prevent index out of bound error
|
||||
ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c)))[:-1]
|
||||
c = 1
|
||||
ddim_timesteps = (np.arange(0, num_ddim_timesteps) * c).astype(int)
|
||||
elif ddim_discr_method == 'quad':
|
||||
ddim_timesteps = (
|
||||
(
|
||||
@ -86,6 +84,7 @@ def make_ddim_timesteps(
|
||||
# assert ddim_timesteps.shape[0] == num_ddim_timesteps
|
||||
# add one to get the final alpha values right (the ones from first scale to data during sampling)
|
||||
steps_out = ddim_timesteps + 1
|
||||
# steps_out = ddim_timesteps
|
||||
|
||||
if verbose:
|
||||
print(f'Selected timesteps for ddim sampler: {steps_out}')
|
||||
|
@ -1,3 +0,0 @@
|
||||
[tool.blue]
|
||||
line-length = 90
|
||||
target-version = ['py310']
|
@ -1,27 +0,0 @@
|
||||
albumentations==0.4.3
|
||||
einops==0.3.0
|
||||
diffusers==0.6.0
|
||||
huggingface-hub==0.8.1
|
||||
imageio==2.9.0
|
||||
imageio-ffmpeg==0.4.2
|
||||
kornia==0.6.0
|
||||
numpy==1.23.1
|
||||
--pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
omegaconf==2.1.1
|
||||
opencv-python==4.6.0.66
|
||||
pillow==9.2.0
|
||||
pudb==2019.2
|
||||
torch==1.12.1
|
||||
torchvision==0.13.0
|
||||
pytorch-lightning==1.7.7
|
||||
streamlit==1.12.0
|
||||
test-tube>=0.7.5
|
||||
torch-fidelity==0.3.0
|
||||
torchmetrics==0.6.0
|
||||
transformers==4.21.3
|
||||
-e git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
-e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
|
||||
-e git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion
|
||||
-e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan
|
||||
-e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg
|
||||
-e .
|
@ -415,7 +415,7 @@ def download_kornia():
|
||||
|
||||
#---------------------------------------------
|
||||
def download_clip():
|
||||
print('Loading CLIP model...',end='')
|
||||
print('Loading CLIP model (ignore deprecation errors)...',end='')
|
||||
sys.stdout.flush()
|
||||
version = 'openai/clip-vit-large-patch14'
|
||||
tokenizer = CLIPTokenizer.from_pretrained(version)
|
||||
@ -424,7 +424,7 @@ def download_clip():
|
||||
|
||||
#---------------------------------------------
|
||||
def download_gfpgan():
|
||||
print('Installing models from RealESRGAN and facexlib...',end='')
|
||||
print('Installing models from RealESRGAN and facexlib (ignore deprecation errors)...',end='')
|
||||
try:
|
||||
from realesrgan import RealESRGANer
|
||||
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
|
||||
@ -442,19 +442,19 @@ def download_gfpgan():
|
||||
print('Error loading ESRGAN:')
|
||||
print(traceback.format_exc())
|
||||
|
||||
print('Loading models from GFPGAN')
|
||||
print('Loading models from GFPGAN...',end='')
|
||||
for model in (
|
||||
[
|
||||
'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth',
|
||||
'src/gfpgan/experiments/pretrained_models/GFPGANv1.4.pth'
|
||||
'models/gfpgan/GFPGANv1.4.pth'
|
||||
],
|
||||
[
|
||||
'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth',
|
||||
'./gfpgan/weights/detection_Resnet50_Final.pth'
|
||||
'models/gfpgan/weights/detection_Resnet50_Final.pth'
|
||||
],
|
||||
[
|
||||
'https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth',
|
||||
'./gfpgan/weights/parsing_parsenet.pth'
|
||||
'models/gfpgan/weights/parsing_parsenet.pth'
|
||||
],
|
||||
):
|
||||
model_url,model_dest = model
|
||||
@ -489,22 +489,23 @@ def download_clipseg():
|
||||
import zipfile
|
||||
try:
|
||||
model_url = 'https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download'
|
||||
model_dest = 'src/clipseg/clipseg_weights.zip'
|
||||
weights_dir = 'src/clipseg/weights'
|
||||
if not os.path.exists(weights_dir):
|
||||
model_dest = 'models/clipseg/clipseg_weights'
|
||||
weights_zip = 'models/clipseg/weights.zip'
|
||||
|
||||
if not os.path.exists(model_dest):
|
||||
os.makedirs(os.path.dirname(model_dest), exist_ok=True)
|
||||
if not os.path.exists('src/clipseg/weights/rd64-uni-refined.pth'):
|
||||
request.urlretrieve(model_url,model_dest)
|
||||
with zipfile.ZipFile(model_dest,'r') as zip:
|
||||
zip.extractall('src/clipseg')
|
||||
os.rename('src/clipseg/clipseg_weights','src/clipseg/weights')
|
||||
os.remove(model_dest)
|
||||
from clipseg_models.clipseg import CLIPDensePredT
|
||||
if not os.path.exists(f'{model_dest}/rd64-uni-refined.pth'):
|
||||
request.urlretrieve(model_url,weights_zip)
|
||||
with zipfile.ZipFile(weights_zip,'r') as zip:
|
||||
zip.extractall('models/clipseg')
|
||||
os.remove(weights_zip)
|
||||
|
||||
from clipseg.clipseg import CLIPDensePredT
|
||||
model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64, )
|
||||
model.eval()
|
||||
model.load_state_dict(
|
||||
torch.load(
|
||||
'src/clipseg/weights/rd64-uni-refined.pth',
|
||||
'models/clipseg/clipseg_weights/rd64-uni-refined.pth',
|
||||
map_location=torch.device('cpu')
|
||||
),
|
||||
strict=False,
|
||||
|