Merge branch 'development' into main

This commit is contained in:
Lincoln Stein 2022-09-16 17:44:15 -04:00 committed by GitHub
commit d81bc46218
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
121 changed files with 12126 additions and 1453 deletions

26
.github/workflows/mkdocs-flow.yml vendored Normal file
View File

@ -0,0 +1,26 @@
name: Deploy
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
build:
name: Deploy docs to GitHub Pages
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build
uses: Tiryoh/actions-mkdocs@v0
with:
mkdocs_version: 'latest' # option
requirements: '/requirements-mkdocs.txt' # option
configfile: '/mkdocs.yml' # option
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./site

12
.gitignore vendored
View File

@ -77,9 +77,6 @@ db.sqlite3-journal
instance/
.webassets-cache
# WebUI temp files:
img2img-tmp.png
# Scrapy stuff:
.scrapy
@ -186,3 +183,12 @@ testtube
checkpoints
# If it's a Mac
.DS_Store
# Let the frontend manage its own gitignore
!frontend/*
# Scratch folder
.scratch/
.vscode/
gfpgan/
models/ldm/stable-diffusion-v1/model.sha256

13
.prettierrc.yaml Normal file
View File

@ -0,0 +1,13 @@
endOfLine: lf
tabWidth: 2
useTabs: false
singleQuote: true
quoteProps: as-needed
embeddedLanguageFormatting: auto
overrides:
- files: "*.md"
options:
proseWrap: always
printWidth: 100
parser: markdown
cursorOffset: -1

164
README.md
View File

@ -1,7 +1,7 @@
<h1 align='center'><b>Stable Diffusion Dream Script</b></h1>
<p align='center'>
<img src="docs/assets/logo.png"/>
<img src="docs/assets/logo.png"/>
</p>
<p align="center">
@ -12,37 +12,39 @@
<img src="https://img.shields.io/github/issues-pr/lstein/stable-diffusion?logo=GitHub&style=for-the-badge" alt="pull-requests"/>
</p>
# **Stable Diffusion Dream Script**
This is a fork of
[CompVis/stable-diffusion](https://github.com/CompVis/stable-diffusion),
the open source text-to-image generator. It provides a streamlined
process with various new features and options to aid the image
generation process. It runs on Windows, Mac and Linux machines,
and runs on GPU cards with as little as 4 GB or RAM.
This is a fork of [CompVis/stable-diffusion](https://github.com/CompVis/stable-diffusion), the open
source text-to-image generator. It provides a streamlined process with various new features and
options to aid the image generation process. It runs on Windows, Mac and Linux machines, and runs on
GPU cards with as little as 4 GB or RAM.
_Note: This fork is rapidly evolving. Please use the
[Issues](https://github.com/lstein/stable-diffusion/issues) tab to
report bugs and make feature requests. Be sure to use the provided
templates. They will help aid diagnose issues faster._
[Issues](https://github.com/lstein/stable-diffusion/issues) tab to report bugs and make feature
requests. Be sure to use the provided templates. They will help aid diagnose issues faster._
**Table of Contents**
# **Table of Contents**
1. [Installation](#installation)
2. [Major Features](#features)
3. [Changelog](#latest-changes)
4. [Troubleshooting](#troubleshooting)
5. [Contributing](#contributing)
6. [Support](#support)
2. [Hardware Requirements](#hardware-requirements)
3. [Features](#features)
4. [Latest Changes](#latest-changes)
5. [Troubleshooting](#troubleshooting)
6. [Contributing](#contributing)
7. [Contributors](#contributors)
8. [Support](#support)
9. [Further Reading](#further-reading)
# Installation
## Installation
This fork is supported across multiple platforms. You can find individual installation instructions below.
This fork is supported across multiple platforms. You can find individual installation instructions
below.
- ## [Linux](docs/installation/INSTALL_LINUX.md)
- ## [Windows](docs/installation/INSTALL_WINDOWS.md)
- ## [Macintosh](docs/installation/INSTALL_MAC.md)
- ### [Linux](docs/installation/INSTALL_LINUX.md)
## **Hardware Requirements**
- ### [Windows](docs/installation/INSTALL_WINDOWS.md)
- ### [Macintosh](docs/installation/INSTALL_MAC.md)
## Hardware Requirements
**System**
@ -61,109 +63,117 @@ You wil need one of the following:
**Note**
If you are have a Nvidia 10xx series card (e.g. the 1080ti), please
run the dream script in full-precision mode as shown below.
If you are have a Nvidia 10xx series card (e.g. the 1080ti), please run the dream script in
full-precision mode as shown below.
Similarly, specify full-precision mode on Apple M1 hardware.
To run in full-precision mode, start `dream.py` with the
`--full_precision` flag:
To run in full-precision mode, start `dream.py` with the `--full_precision` flag:
```
```bash
(ldm) ~/stable-diffusion$ python scripts/dream.py --full_precision
```
# Features
## Features
## **Major Features**
### Major Features
- ## [Interactive Command Line Interface](docs/features/CLI.md)
- #### [Interactive Command Line Interface](docs/features/CLI.md)
- ## [Image To Image](docs/features/IMG2IMG.md)
- #### [Image To Image](docs/features/IMG2IMG.md)
- ## [Inpainting Support](docs/features/INPAINTING.md)
- #### [Inpainting Support](docs/features/INPAINTING.md)
- ## [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md)
- #### [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md)
- ## [Embiggen upscaling](docs/features/EMBIGGEN.md)
- #### [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
- ## [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
- #### [Google Colab](docs/features/OTHER.md#google-colab)
- ## [Google Colab](docs/features/OTHER.md#google-colab)
- #### [Web Server](docs/features/WEB.md)
- ## [Web Server](docs/features/WEB.md)
- #### [Reading Prompts From File](docs/features/OTHER.md#reading-prompts-from-a-file)
- ## [Reading Prompts From File](docs/features/OTHER.md#reading-prompts-from-a-file)
- #### [Shortcut: Reusing Seeds](docs/features/OTHER.md#shortcuts-reusing-seeds)
- ## [Shortcut: Reusing Seeds](docs/features/OTHER.md#shortcuts-reusing-seeds)
- #### [Weighted Prompts](docs/features/OTHER.md#weighted-prompts)
- ## [Weighted Prompts](docs/features/OTHER.md#weighted-prompts)
- #### [Variations](docs/features/VARIATIONS.md)
- ## [Variations](docs/features/VARIATIONS.md)
- #### [Personalizing Text-to-Image Generation](docs/features/TEXTUAL_INVERSION.md)
- ## [Personalizing Text-to-Image Generation](docs/features/TEXTUAL_INVERSION.md)
- #### [Simplified API for text to image generation](docs/features/OTHER.md#simplified-api)
- ## [Simplified API for text to image generation](docs/features/OTHER.md#simplified-api)
### Other Features
## **Other Features**
- #### [Creating Transparent Regions for Inpainting](docs/features/INPAINTING.md#creating-transparent-regions-for-inpainting)
- ### [Creating Transparent Regions for Inpainting](docs/features/INPAINTING.md#creating-transparent-regions-for-inpainting)
- #### [Preload Models](docs/features/OTHER.md#preload-models)
- ### [Preload Models](docs/features/OTHER.md#preload-models)
# Latest Changes
## Latest Changes
- v1.14 (11 September 2022)
- Memory optimizations for small-RAM cards. 512x512 now possible on 4 GB GPUs.
- Full support for Apple hardware with M1 or M2 chips.
- Add "seamless mode" for circular tiling of image. Generates beautiful effects. ([prixt](https://github.com/prixt)).
- Add "seamless mode" for circular tiling of image. Generates beautiful effects.
([prixt](https://github.com/prixt)).
- Inpainting support.
- Improved web server GUI.
- Lots of code and documentation cleanups.
- v1.13 (3 September 2022
- Support image variations (see [VARIATIONS](docs/features/VARIATIONS.md) ([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers)
- Supports a Google Colab notebook for a standalone server running on Google hardware [Arturo Mendivil](https://github.com/artmen1516)
- WebUI supports GFPGAN/ESRGAN facial reconstruction and upscaling [Kevin Gibbons](https://github.com/bakkot)
- WebUI supports incremental display of in-progress images during generation [Kevin Gibbons](https://github.com/bakkot)
- A new configuration file scheme that allows new models (including upcoming stable-diffusion-v1.5)
to be added without altering the code. ([David Wager](https://github.com/maddavid12))
- Support image variations (see [VARIATIONS](docs/features/VARIATIONS.md)
([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers)
- Supports a Google Colab notebook for a standalone server running on Google hardware
[Arturo Mendivil](https://github.com/artmen1516)
- WebUI supports GFPGAN/ESRGAN facial reconstruction and upscaling
[Kevin Gibbons](https://github.com/bakkot)
- WebUI supports incremental display of in-progress images during generation
[Kevin Gibbons](https://github.com/bakkot)
- A new configuration file scheme that allows new models (including upcoming
stable-diffusion-v1.5) to be added without altering the code.
([David Wager](https://github.com/maddavid12))
- Can specify --grid on dream.py command line as the default.
- Miscellaneous internal bug and stability fixes.
- Works on M1 Apple hardware.
- Multiple bug fixes.
For older changelogs, please visit **[CHANGELOGS](docs/CHANGELOG.md)**.
For older changelogs, please visit the **[CHANGELOG](docs/features/CHANGELOG.md)**.
# Troubleshooting
## Troubleshooting
Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation problems and other issues.
Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation
problems and other issues.
# Contributing
## Contributing
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with
how to contribute to GitHub projects, here is a [Getting Started Guide](https://opensource.com/article/19/7/create-pull-request-github).
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how
to contribute to GitHub projects, here is a
[Getting Started Guide](https://opensource.com/article/19/7/create-pull-request-github).
A full set of contribution guidelines, along with templates, are in progress, but for now the most important thing is to **make your pull request against the "development" branch**, and not against "main". This will help keep public breakage to a minimum and will allow you to propose more radical changes.
A full set of contribution guidelines, along with templates, are in progress, but for now the most
important thing is to **make your pull request against the "development" branch**, and not against
"main". This will help keep public breakage to a minimum and will allow you to propose more radical
changes.
## **Contributors**
## Contributors
This fork is a combined effort of various people from across the
world. [Check out the list of all these amazing
people](docs/CONTRIBUTORS.md). We thank them for their time, hard work
and effort.
This fork is a combined effort of various people from across the world.
[Check out the list of all these amazing people](docs/other/CONTRIBUTORS.md). We thank them for
their time, hard work and effort.
# Support
## Support
For support,
please use this repository's GitHub Issues tracking service. Feel free
to send me an email if you use and like the script.
For support, please use this repository's GitHub Issues tracking service. Feel free to send me an
email if you use and like the script.
Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein)
Original portions of the software are Copyright (c) 2020
[Lincoln D. Stein](https://github.com/lstein)
# Further Reading
## Further Reading
Please see the original README for more information on this software
and underlying algorithm, located in the file [README-CompViz.md](docs/README-CompViz.md).
Please see the original README for more information on this software and underlying algorithm,
located in the file [README-CompViz.md](docs/other/README-CompViz.md).

View File

@ -0,0 +1,206 @@
from modules.parse_seed_weights import parse_seed_weights
import argparse
SAMPLER_CHOICES = [
'ddim',
'k_dpm_2_a',
'k_dpm_2',
'k_euler_a',
'k_euler',
'k_heun',
'k_lms',
'plms',
]
def parameters_to_command(params):
"""
Converts dict of parameters into a `dream.py` REPL command.
"""
switches = list()
if 'prompt' in params:
switches.append(f'"{params["prompt"]}"')
if 'steps' in params:
switches.append(f'-s {params["steps"]}')
if 'seed' in params:
switches.append(f'-S {params["seed"]}')
if 'width' in params:
switches.append(f'-W {params["width"]}')
if 'height' in params:
switches.append(f'-H {params["height"]}')
if 'cfg_scale' in params:
switches.append(f'-C {params["cfg_scale"]}')
if 'sampler_name' in params:
switches.append(f'-A {params["sampler_name"]}')
if 'seamless' in params and params["seamless"] == True:
switches.append(f'--seamless')
if 'init_img' in params and len(params['init_img']) > 0:
switches.append(f'-I {params["init_img"]}')
if 'init_mask' in params and len(params['init_mask']) > 0:
switches.append(f'-M {params["init_mask"]}')
if 'strength' in params and 'init_img' in params:
switches.append(f'-f {params["strength"]}')
if 'fit' in params and params["fit"] == True:
switches.append(f'--fit')
if 'gfpgan_strength' in params and params["gfpgan_strength"]:
switches.append(f'-G {params["gfpgan_strength"]}')
if 'upscale' in params and params["upscale"]:
switches.append(f'-U {params["upscale"][0]} {params["upscale"][1]}')
if 'variation_amount' in params and params['variation_amount'] > 0:
switches.append(f'-v {params["variation_amount"]}')
if 'with_variations' in params:
seed_weight_pairs = ','.join(f'{seed}:{weight}' for seed, weight in params["with_variations"])
switches.append(f'-V {seed_weight_pairs}')
return ' '.join(switches)
def create_cmd_parser():
"""
This is simply a copy of the parser from `dream.py` with a change to give
prompt a default value. This is a temporary hack pending merge of #587 which
provides a better way to do this.
"""
parser = argparse.ArgumentParser(
description='Example: dream> a fantastic alien landscape -W1024 -H960 -s100 -n12',
exit_on_error=True,
)
parser.add_argument('prompt', nargs='?', default='')
parser.add_argument('-s', '--steps', type=int, help='Number of steps')
parser.add_argument(
'-S',
'--seed',
type=int,
help='Image seed; a +ve integer, or use -1 for the previous seed, -2 for the one before that, etc',
)
parser.add_argument(
'-n',
'--iterations',
type=int,
default=1,
help='Number of samplings to perform (slower, but will provide seeds for individual images)',
)
parser.add_argument(
'-W', '--width', type=int, help='Image width, multiple of 64'
)
parser.add_argument(
'-H', '--height', type=int, help='Image height, multiple of 64'
)
parser.add_argument(
'-C',
'--cfg_scale',
default=7.5,
type=float,
help='Classifier free guidance (CFG) scale - higher numbers cause generator to "try" harder.',
)
parser.add_argument(
'-g', '--grid', action='store_true', help='generate a grid'
)
parser.add_argument(
'--outdir',
'-o',
type=str,
default=None,
help='Directory to save generated images and a log of prompts and seeds',
)
parser.add_argument(
'--seamless',
action='store_true',
help='Change the model to seamless tiling (circular) mode',
)
parser.add_argument(
'-i',
'--individual',
action='store_true',
help='Generate individual files (default)',
)
parser.add_argument(
'-I',
'--init_img',
type=str,
help='Path to input image for img2img mode (supersedes width and height)',
)
parser.add_argument(
'-M',
'--init_mask',
type=str,
help='Path to input mask for inpainting mode (supersedes width and height)',
)
parser.add_argument(
'-T',
'-fit',
'--fit',
action='store_true',
help='If specified, will resize the input image to fit within the dimensions of width x height (512x512 default)',
)
parser.add_argument(
'-f',
'--strength',
default=0.75,
type=float,
help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely',
)
parser.add_argument(
'-G',
'--gfpgan_strength',
default=0,
type=float,
help='The strength at which to apply the GFPGAN model to the result, in order to improve faces.',
)
parser.add_argument(
'-U',
'--upscale',
nargs='+',
default=None,
type=float,
help='Scale factor (2, 4) for upscaling followed by upscaling strength (0-1.0). If strength not specified, defaults to 0.75'
)
parser.add_argument(
'-save_orig',
'--save_original',
action='store_true',
help='Save original. Use it when upscaling to save both versions.',
)
# variants is going to be superseded by a generalized "prompt-morph" function
# parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
parser.add_argument(
'-x',
'--skip_normalize',
action='store_true',
help='Skip subprompt weight normalization',
)
parser.add_argument(
'-A',
'-m',
'--sampler',
dest='sampler_name',
default=None,
type=str,
choices=SAMPLER_CHOICES,
metavar='SAMPLER_NAME',
help=f'Switch to a different sampler. Supported samplers: {", ".join(SAMPLER_CHOICES)}',
)
parser.add_argument(
'-t',
'--log_tokenization',
action='store_true',
help='shows how the prompt is split into tokens'
)
parser.add_argument(
'-v',
'--variation_amount',
default=0.0,
type=float,
help='If > 0, generates variations on the initial seed instead of random seeds per iteration. Must be between 0 and 1. Higher values will be more different.'
)
parser.add_argument(
'-V',
'--with_variations',
default=None,
type=str,
help='list of variations to apply, in the format `seed:weight,seed:weight,...'
)
return parser

View File

@ -0,0 +1,47 @@
def parse_seed_weights(seed_weights):
"""
Accepts seed weights as string in "12345:0.1,23456:0.2,3456:0.3" format
Validates them
If valid: returns as [[12345, 0.1], [23456, 0.2], [3456, 0.3]]
If invalid: returns False
"""
# Must be a string
if not isinstance(seed_weights, str):
return False
# String must not be empty
if len(seed_weights) == 0:
return False
pairs = []
for pair in seed_weights.split(","):
split_values = pair.split(":")
# Seed and weight are required
if len(split_values) != 2:
return False
if len(split_values[0]) == 0 or len(split_values[1]) == 1:
return False
# Try casting the seed to int and weight to float
try:
seed = int(split_values[0])
weight = float(split_values[1])
except ValueError:
return False
# Seed must be 0 or above
if not seed >= 0:
return False
# Weight must be between 0 and 1
if not (weight >= 0 and weight <= 1):
return False
# This pair is valid
pairs.append([seed, weight])
# All pairs are valid
return pairs

397
backend/server.py Normal file
View File

@ -0,0 +1,397 @@
import mimetypes
import transformers
import json
import os
import traceback
import eventlet
import glob
import shlex
import argparse
from flask_socketio import SocketIO
from flask import Flask, send_from_directory, url_for, jsonify
from pathlib import Path
from PIL import Image
from pytorch_lightning import logging
from threading import Event
from uuid import uuid4
from ldm.gfpgan.gfpgan_tools import real_esrgan_upscale
from ldm.gfpgan.gfpgan_tools import run_gfpgan
from ldm.generate import Generate
from ldm.dream.pngwriter import PngWriter, retrieve_metadata
from modules.parameters import parameters_to_command, create_cmd_parser
"""
USER CONFIG
"""
output_dir = "outputs/" # Base output directory for images
#host = 'localhost' # Web & socket.io host
host = '0.0.0.0' # Web & socket.io host
port = 9090 # Web & socket.io port
verbose = False # enables copious socket.io logging
additional_allowed_origins = ['http://localhost:9090'] # additional CORS allowed origins
"""
END USER CONFIG
"""
"""
SERVER SETUP
"""
# fix missing mimetypes on windows due to registry wonkiness
mimetypes.add_type('application/javascript', '.js')
mimetypes.add_type('text/css', '.css')
app = Flask(__name__, static_url_path='', static_folder='../frontend/dist/')
app.config['OUTPUTS_FOLDER'] = "../outputs"
@app.route('/outputs/<path:filename>')
def outputs(filename):
return send_from_directory(
app.config['OUTPUTS_FOLDER'],
filename
)
@app.route("/", defaults={'path': ''})
def serve(path):
return send_from_directory(app.static_folder, 'index.html')
logger = True if verbose else False
engineio_logger = True if verbose else False
# default 1,000,000, needs to be higher for socketio to accept larger images
max_http_buffer_size = 10000000
cors_allowed_origins = [f"http://{host}:{port}"] + additional_allowed_origins
socketio = SocketIO(
app,
logger=logger,
engineio_logger=engineio_logger,
max_http_buffer_size=max_http_buffer_size,
cors_allowed_origins=cors_allowed_origins,
)
"""
END SERVER SETUP
"""
"""
APP SETUP
"""
class CanceledException(Exception):
pass
canceled = Event()
# reduce logging outputs to error
transformers.logging.set_verbosity_error()
logging.getLogger('pytorch_lightning').setLevel(logging.ERROR)
# Initialize and load model
model = Generate()
model.load_model()
# location for "finished" images
result_path = os.path.join(output_dir, 'img-samples/')
# temporary path for intermediates
intermediate_path = os.path.join(result_path, 'intermediates/')
# path for user-uploaded init images and masks
init_path = os.path.join(result_path, 'init-images/')
mask_path = os.path.join(result_path, 'mask-images/')
# txt log
log_path = os.path.join(result_path, 'dream_log.txt')
# make all output paths
[os.makedirs(path, exist_ok=True)
for path in [result_path, intermediate_path, init_path, mask_path]]
"""
END APP SETUP
"""
"""
SOCKET.IO LISTENERS
"""
@socketio.on('requestAllImages')
def handle_request_all_images():
print(f'>> All images requested')
parser = create_cmd_parser()
paths = list(filter(os.path.isfile, glob.glob(result_path + "*.png")))
paths.sort(key=lambda x: os.path.getmtime(x))
image_array = []
for path in paths:
# image = Image.open(path)
all_metadata = retrieve_metadata(path)
if 'Dream' in all_metadata and not all_metadata['sd-metadata']:
metadata = vars(parser.parse_args(shlex.split(all_metadata['Dream'])))
else:
metadata = all_metadata['sd-metadata']
image_array.append({'path': path, 'metadata': metadata})
return make_response("OK", data=image_array)
@socketio.on('generateImage')
def handle_generate_image_event(generation_parameters, esrgan_parameters, gfpgan_parameters):
print(f'>> Image generation requested: {generation_parameters}\nESRGAN parameters: {esrgan_parameters}\nGFPGAN parameters: {gfpgan_parameters}')
generate_images(
generation_parameters,
esrgan_parameters,
gfpgan_parameters
)
return make_response("OK")
@socketio.on('runESRGAN')
def handle_run_esrgan_event(original_image, esrgan_parameters):
print(f'>> ESRGAN upscale requested for "{original_image["url"]}": {esrgan_parameters}')
image = Image.open(original_image["url"])
seed = original_image['metadata']['seed'] if 'seed' in original_image['metadata'] else 'unknown_seed'
image = real_esrgan_upscale(
image=image,
upsampler_scale=esrgan_parameters['upscale'][0],
strength=esrgan_parameters['upscale'][1],
seed=seed
)
esrgan_parameters['seed'] = seed
path = save_image(image, esrgan_parameters, result_path, postprocessing='esrgan')
command = parameters_to_command(esrgan_parameters)
write_log_message(f'[Upscaled] "{original_image["url"]}" > "{path}": {command}')
socketio.emit(
'result', {'url': os.path.relpath(path), 'type': 'esrgan', 'uuid': original_image['uuid'],'metadata': esrgan_parameters})
@socketio.on('runGFPGAN')
def handle_run_gfpgan_event(original_image, gfpgan_parameters):
print(f'>> GFPGAN face fix requested for "{original_image["url"]}": {gfpgan_parameters}')
image = Image.open(original_image["url"])
seed = original_image['metadata']['seed'] if 'seed' in original_image['metadata'] else 'unknown_seed'
image = run_gfpgan(
image=image,
strength=gfpgan_parameters['gfpgan_strength'],
seed=seed,
upsampler_scale=1
)
gfpgan_parameters['seed'] = seed
path = save_image(image, gfpgan_parameters, result_path, postprocessing='gfpgan')
command = parameters_to_command(gfpgan_parameters)
write_log_message(f'[Fixed faces] "{original_image["url"]}" > "{path}": {command}')
socketio.emit(
'result', {'url': os.path.relpath(path), 'type': 'gfpgan', 'uuid': original_image['uuid'],'metadata': gfpgan_parameters})
@socketio.on('cancel')
def handle_cancel():
print(f'>> Cancel processing requested')
canceled.set()
return make_response("OK")
# TODO: I think this needs a safety mechanism.
@socketio.on('deleteImage')
def handle_delete_image(path):
print(f'>> Delete requested "{path}"')
Path(path).unlink()
return make_response("OK")
# TODO: I think this needs a safety mechanism.
@socketio.on('uploadInitialImage')
def handle_upload_initial_image(bytes, name):
print(f'>> Init image upload requested "{name}"')
uuid = uuid4().hex
split = os.path.splitext(name)
name = f'{split[0]}.{uuid}{split[1]}'
file_path = os.path.join(init_path, name)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
newFile = open(file_path, "wb")
newFile.write(bytes)
return make_response("OK", data=file_path)
# TODO: I think this needs a safety mechanism.
@socketio.on('uploadMaskImage')
def handle_upload_mask_image(bytes, name):
print(f'>> Mask image upload requested "{name}"')
uuid = uuid4().hex
split = os.path.splitext(name)
name = f'{split[0]}.{uuid}{split[1]}'
file_path = os.path.join(mask_path, name)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
newFile = open(file_path, "wb")
newFile.write(bytes)
return make_response("OK", data=file_path)
"""
END SOCKET.IO LISTENERS
"""
"""
ADDITIONAL FUNCTIONS
"""
def write_log_message(message, log_path=log_path):
"""Logs the filename and parameters used to generate or process that image to log file"""
message = f'{message}\n'
with open(log_path, 'a', encoding='utf-8') as file:
file.writelines(message)
def make_response(status, message=None, data=None):
response = {'status': status}
if message is not None:
response['message'] = message
if data is not None:
response['data'] = data
return response
def save_image(image, parameters, output_dir, step_index=None, postprocessing=False):
seed = parameters['seed'] if 'seed' in parameters else 'unknown_seed'
pngwriter = PngWriter(output_dir)
prefix = pngwriter.unique_prefix()
filename = f'{prefix}.{seed}'
if step_index:
filename += f'.{step_index}'
if postprocessing:
filename += f'.postprocessed'
filename += '.png'
command = parameters_to_command(parameters)
path = pngwriter.save_image_and_prompt_to_png(image, command, metadata=parameters, name=filename)
return path
def generate_images(generation_parameters, esrgan_parameters, gfpgan_parameters):
canceled.clear()
step_index = 1
def image_progress(sample, step):
if canceled.is_set():
raise CanceledException
nonlocal step_index
nonlocal generation_parameters
if generation_parameters["progress_images"] and step % 5 == 0 and step < generation_parameters['steps'] - 1:
image = model.sample_to_image(sample)
path = save_image(image, generation_parameters, intermediate_path, step_index)
step_index += 1
socketio.emit('intermediateResult', {
'url': os.path.relpath(path), 'metadata': generation_parameters})
socketio.emit('progress', {'step': step + 1})
eventlet.sleep(0)
def image_done(image, seed):
nonlocal generation_parameters
nonlocal esrgan_parameters
nonlocal gfpgan_parameters
all_parameters = generation_parameters
postprocessing = False
if esrgan_parameters:
image = real_esrgan_upscale(
image=image,
strength=esrgan_parameters['strength'],
upsampler_scale=esrgan_parameters['level'],
seed=seed
)
postprocessing = True
all_parameters["upscale"] = [esrgan_parameters['level'], esrgan_parameters['strength']]
if gfpgan_parameters:
image = run_gfpgan(
image=image,
strength=gfpgan_parameters['strength'],
seed=seed,
upsampler_scale=1,
)
postprocessing = True
all_parameters["gfpgan_strength"] = gfpgan_parameters['strength']
all_parameters['seed'] = seed
path = save_image(image, all_parameters, result_path, postprocessing=postprocessing)
command = parameters_to_command(all_parameters)
print(f'Image generated: "{path}"')
write_log_message(f'[Generated] "{path}": {command}')
socketio.emit(
'result', {'url': os.path.relpath(path), 'type': 'generation', 'metadata': all_parameters})
eventlet.sleep(0)
try:
model.prompt2image(
**generation_parameters,
step_callback=image_progress,
image_callback=image_done
)
except KeyboardInterrupt:
raise
except CanceledException:
pass
except Exception as e:
socketio.emit('error', (str(e)))
print("\n")
traceback.print_exc()
print("\n")
"""
END ADDITIONAL FUNCTIONS
"""
if __name__ == '__main__':
print(f'Starting server at http://{host}:{port}')
socketio.run(app, host=host, port=port)

View File

@ -1,137 +0,0 @@
# **Changelog**
## v1.13 (in process)
- Supports a Google Colab notebook for a standalone server running on Google hardware [Arturo Mendivil](https://github.com/artmen1516)
- WebUI supports GFPGAN/ESRGAN facial reconstruction and upscaling [Kevin Gibbons](https://github.com/bakkot)
- WebUI supports incremental display of in-progress images during generation [Kevin Gibbons](https://github.com/bakkot)
- Output directory can be specified on the dream> command line.
- The grid was displaying duplicated images when not enough images to fill the final row [Muhammad Usama](https://github.com/SMUsamaShah)
- Can specify --grid on dream.py command line as the default.
- Miscellaneous internal bug and stability fixes.
---
## v1.12 (28 August 2022)
- Improved file handling, including ability to read prompts from standard input.
(kudos to [Yunsaki](https://github.com/yunsaki)
- The web server is now integrated with the dream.py script. Invoke by adding --web to
the dream.py command arguments.
- Face restoration and upscaling via GFPGAN and Real-ESGAN are now automatically
enabled if the GFPGAN directory is located as a sibling to Stable Diffusion.
VRAM requirements are modestly reduced. Thanks to both [Blessedcoolant](https://github.com/blessedcoolant) and
[Oceanswave](https://github.com/oceanswave) for their work on this.
- You can now swap samplers on the dream> command line. [Blessedcoolant](https://github.com/blessedcoolant)
---
## v1.11 (26 August 2022)
- NEW FEATURE: Support upscaling and face enhancement using the GFPGAN module. (kudos to [Oceanswave](https://github.com/Oceanswave)
- You now can specify a seed of -1 to use the previous image's seed, -2 to use the seed for the image generated before that, etc.
Seed memory only extends back to the previous command, but will work on all images generated with the -n# switch.
- Variant generation support temporarily disabled pending more general solution.
- Created a feature branch named **yunsaki-morphing-dream** which adds experimental support for
iteratively modifying the prompt and its parameters. Please see[ Pull Request #86](https://github.com/lstein/stable-diffusion/pull/86)
for a synopsis of how this works. Note that when this feature is eventually added to the main branch, it will may be modified
significantly.
---
## v1.10 (25 August 2022)
- A barebones but fully functional interactive web server for online generation of txt2img and img2img.
---
## v1.09 (24 August 2022)
- A new -v option allows you to generate multiple variants of an initial image
in img2img mode. (kudos to [Oceanswave](https://github.com/Oceanswave). [
See this discussion in the PR for examples and details on use](https://github.com/lstein/stable-diffusion/pull/71#issuecomment-1226700810))
- Added ability to personalize text to image generation (kudos to [Oceanswave](https://github.com/Oceanswave) and [nicolai256](https://github.com/nicolai256))
- Enabled all of the samplers from k_diffusion
---
## v1.08 (24 August 2022)
- Escape single quotes on the dream> command before trying to parse. This avoids
parse errors.
- Removed instruction to get Python3.8 as first step in Windows install.
Anaconda3 does it for you.
- Added bounds checks for numeric arguments that could cause crashes.
- Cleaned up the copyright and license agreement files.
---
## v1.07 (23 August 2022)
- Image filenames will now never fill gaps in the sequence, but will be assigned the
next higher name in the chosen directory. This ensures that the alphabetic and chronological
sort orders are the same.
---
## v1.06 (23 August 2022)
- Added weighted prompt support contributed by [xraxra](https://github.com/xraxra)
- Example of using weighted prompts to tweak a demonic figure contributed by [bmaltais](https://github.com/bmaltais)
---
## v1.05 (22 August 2022 - after the drop)
- Filenames now use the following formats:
000010.95183149.png -- Two files produced by the same command (e.g. -n2),
000010.26742632.png -- distinguished by a different seed.
000011.455191342.01.png -- Two files produced by the same command using
000011.455191342.02.png -- a batch size>1 (e.g. -b2). They have the same seed.
000011.4160627868.grid#1-4.png -- a grid of four images (-g); the whole grid can
be regenerated with the indicated key
- It should no longer be possible for one image to overwrite another
- You can use the "cd" and "pwd" commands at the dream> prompt to set and retrieve
the path of the output directory.
---
## v1.04 (22 August 2022 - after the drop)
- Updated README to reflect installation of the released weights.
- Suppressed very noisy and inconsequential warning when loading the frozen CLIP
tokenizer.
---
## v1.03 (22 August 2022)
- The original txt2img and img2img scripts from the CompViz repository have been moved into
a subfolder named "orig_scripts", to reduce confusion.
---
## v1.02 (21 August 2022)
- A copy of the prompt and all of its switches and options is now stored in the corresponding
image in a tEXt metadata field named "Dream". You can read the prompt using scripts/images2prompt.py,
or an image editor that allows you to explore the full metadata.
**Please run "conda env update -f environment.yaml" to load the k_lms dependencies!!**
---
## v1.01 (21 August 2022)
- added k_lms sampling.
**Please run "conda env update -f environment.yaml" to load the k_lms dependencies!!**
- use half precision arithmetic by default, resulting in faster execution and lower memory requirements
Pass argument --full_precision to dream.py to get slower but more accurate image generation
---
## Links
- **[Read Me](../readme.md)**

141
docs/features/CHANGELOG.md Normal file
View File

@ -0,0 +1,141 @@
---
title: Changelog
---
## v1.13 <small>(in process)</small>
- Supports a Google Colab notebook for a standalone server running on Google
hardware [Arturo Mendivil](https://github.com/artmen1516)
- WebUI supports GFPGAN/ESRGAN facial reconstruction and upscaling
[Kevin Gibbons](https://github.com/bakkot)
- WebUI supports incremental display of in-progress images during generation
[Kevin Gibbons](https://github.com/bakkot)
- Output directory can be specified on the dream> command line.
- The grid was displaying duplicated images when not enough images to fill the
final row [Muhammad Usama](https://github.com/SMUsamaShah)
- Can specify --grid on dream.py command line as the default.
- Miscellaneous internal bug and stability fixes.
---
## v1.12 <small>(28 August 2022)</small>
- Improved file handling, including ability to read prompts from standard input.
(kudos to [Yunsaki](https://github.com/yunsaki)
- The web server is now integrated with the dream.py script. Invoke by adding
--web to the dream.py command arguments.
- Face restoration and upscaling via GFPGAN and Real-ESGAN are now automatically
enabled if the GFPGAN directory is located as a sibling to Stable Diffusion.
VRAM requirements are modestly reduced. Thanks to both
[Blessedcoolant](https://github.com/blessedcoolant) and
[Oceanswave](https://github.com/oceanswave) for their work on this.
- You can now swap samplers on the dream> command line.
[Blessedcoolant](https://github.com/blessedcoolant)
---
## v1.11 <small>(26 August 2022)</small>
- NEW FEATURE: Support upscaling and face enhancement using the GFPGAN module.
(kudos to [Oceanswave](https://github.com/Oceanswave))
- You now can specify a seed of -1 to use the previous image's seed, -2 to use
the seed for the image generated before that, etc. Seed memory only extends
back to the previous command, but will work on all images generated with the
-n# switch.
- Variant generation support temporarily disabled pending more general solution.
- Created a feature branch named **yunsaki-morphing-dream** which adds
experimental support for iteratively modifying the prompt and its parameters.
Please
see[ Pull Request #86](https://github.com/lstein/stable-diffusion/pull/86) for
a synopsis of how this works. Note that when this feature is eventually added
to the main branch, it will may be modified significantly.
---
## v1.10 <small>(25 August 2022)</small>
- A barebones but fully functional interactive web server for online generation
of txt2img and img2img.
---
## v1.09 <small>(24 August 2022)</small>
- A new -v option allows you to generate multiple variants of an initial image
in img2img mode. (kudos to [Oceanswave](https://github.com/Oceanswave).
- [See this discussion in the PR for examples and details on use](https://github.com/lstein/stable-diffusion/pull/71#issuecomment-1226700810))
- Added ability to personalize text to image generation (kudos to
[Oceanswave](https://github.com/Oceanswave) and
[nicolai256](https://github.com/nicolai256))
- Enabled all of the samplers from k_diffusion
---
## v1.08 <small>(24 August 2022)</small>
- Escape single quotes on the dream> command before trying to parse. This avoids
parse errors.
- Removed instruction to get Python3.8 as first step in Windows install.
Anaconda3 does it for you.
- Added bounds checks for numeric arguments that could cause crashes.
- Cleaned up the copyright and license agreement files.
---
## v1.07 <small>(23 August 2022)</small>
- Image filenames will now never fill gaps in the sequence, but will be assigned
the next higher name in the chosen directory. This ensures that the alphabetic
and chronological sort orders are the same.
---
## v1.06 <small>(23 August 2022)</small>
- Added weighted prompt support contributed by
[xraxra](https://github.com/xraxra)
- Example of using weighted prompts to tweak a demonic figure contributed by
[bmaltais](https://github.com/bmaltais)
---
## v1.05 <small>(22 August 2022 - after the drop)</small>
- Filenames now use the following formats: 000010.95183149.png -- Two files
produced by the same command (e.g. -n2), 000010.26742632.png -- distinguished
by a different seed.
000011.455191342.01.png -- Two files produced by the same command using
000011.455191342.02.png -- a batch size>1 (e.g. -b2). They have the same seed.
000011.4160627868.grid#1-4.png -- a grid of four images (-g); the whole grid
can be regenerated with the indicated key
- It should no longer be possible for one image to overwrite another
- You can use the "cd" and "pwd" commands at the dream> prompt to set and
retrieve the path of the output directory.
## v1.04 <small>(22 August 2022 - after the drop)</small>
- Updated README to reflect installation of the released weights.
- Suppressed very noisy and inconsequential warning when loading the frozen CLIP
tokenizer.
## v1.03 <small>(22 August 2022)</small>
- The original txt2img and img2img scripts from the CompViz repository have been
moved into a subfolder named "orig_scripts", to reduce confusion.
## v1.02 <small>(21 August 2022)</small>
- A copy of the prompt and all of its switches and options is now stored in the
corresponding image in a tEXt metadata field named "Dream". You can read the
prompt using scripts/images2prompt.py, or an image editor that allows you to
explore the full metadata. **Please run "conda env update -f environment.yaml"
to load the k_lms dependencies!!**
## v1.01 <small>(21 August 2022)</small>
- added k_lms sampling. **Please run "conda env update -f environment.yaml" to
load the k_lms dependencies!!**
- use half precision arithmetic by default, resulting in faster execution and
lower memory requirements Pass argument --full_precision to dream.py to get
slower but more accurate image generation

View File

@ -1,32 +1,29 @@
# **Interactive Command-Line Interface**
---
title: CLI
---
The `dream.py` script, located in `scripts/dream.py`, provides an
interactive interface to image generation similar to the "dream
mothership" bot that Stable AI provided on its Discord server.
## **Interactive Command Line Interface**
Unlike the txt2img.py and img2img.py scripts provided in the original
CompViz/stable-diffusion source code repository, the time-consuming
initialization of the AI model initialization only happens once. After
that image generation from the command-line interface is very fast.
The `dream.py` script, located in `scripts/dream.py`, provides an interactive interface to image
generation similar to the "dream mothership" bot that Stable AI provided on its Discord server.
The script uses the readline library to allow for in-line editing,
command history (up and down arrows), autocompletion, and more. To
help keep track of which prompts generated which images, the script
writes a log file of image names and prompts to the selected output
directory.
Unlike the txt2img.py and img2img.py scripts provided in the original CompViz/stable-diffusion
source code repository, the time-consuming initialization of the AI model initialization only
happens once. After that image generation from the command-line interface is very fast.
In addition, as of version 1.02, it also writes the prompt into the
PNG file's metadata where it can be retrieved using
scripts/images2prompt.py
The script uses the readline library to allow for in-line editing, command history (up and down
arrows), autocompletion, and more. To help keep track of which prompts generated which images, the
script writes a log file of image names and prompts to the selected output directory.
In addition, as of version 1.02, it also writes the prompt into the PNG file's metadata where it can
be retrieved using scripts/images2prompt.py
The script is confirmed to work on Linux, Windows and Mac systems.
_Note:_ This script runs from the command-line or can be used as a Web
application. The Web GUI is currently rudimentary, but a much better
replacement is on its way.
_Note:_ This script runs from the command-line or can be used as a Web application. The Web GUI is
currently rudimentary, but a much better replacement is on its way.
```
```bash
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py
* Initializing, be patient...
Loading model from models/ldm/text2img-large/model.ckpt
@ -54,238 +51,175 @@ dream> q
<img src="../assets/dream-py-demo.png"/>
</p>
The `dream>` prompt's arguments are pretty much identical to those
used in the Discord bot, except you don't need to type "!dream" (it
doesn't hurt if you do). A significant change is that creation of
individual images is now the default unless --grid (-g) is given. A
full list is given in [List of prompt arguments]
(#list-of-prompt-arguments).
The `dream>` prompt's arguments are pretty much identical to those used in the Discord bot, except
you don't need to type "!dream" (it doesn't hurt if you do). A significant change is that creation
of individual images is now the default unless --grid (-g) is given. A full list is given in [List
of prompt arguments] (#list-of-prompt-arguments).
# Arguments
## Arguments
The script itself also recognizes a series of command-line switches
that will change important global defaults, such as the directory for
image outputs and the location of the model weight files.
The script itself also recognizes a series of command-line switches that will change important
global defaults, such as the directory for image outputs and the location of the model weight files.
## List of arguments recognized at the command line:
## List of arguments recognized at the command line
These command-line arguments can be passed to dream.py when you first
run it from the Windows, Mac or Linux command line. Some set defaults
that can be overridden on a per-prompt basis (see [List of prompt
arguments] (#list-of-prompt-arguments). Others
These command-line arguments can be passed to dream.py when you first run it from the Windows, Mac
or Linux command line. Some set defaults that can be overridden on a per-prompt basis (see [List of
prompt arguments] (#list-of-prompt-arguments). Others
| Argument | Shortcut | Default | Description |
|--------------------|------------|---------------------|--------------|
| --help | -h | | Print a concise help message. |
| --outdir <path> | -o<path> | outputs/img_samples | Location for generated images. |
| --prompt_as_dir | -p | False | Name output directories using the prompt text. |
| --from_file <path> | | None | Read list of prompts from a file. Use "-" to read from standard input |
| --model <modelname>| | stable-diffusion-1.4| Loads model specified in configs/models.yaml. Currently one of "stable-diffusion-1.4" or "laion400m"|
| --full_precision | -F | False | Run in slower full-precision mode. Needed for Macintosh M1/M2 hardware and some older video cards. |
| --web | | False | Start in web server mode |
| --host <ip addr> | | localhost | Which network interface web server should listen on. Set to 0.0.0.0 to listen on any. |
| --port <port> | | 9090 | Which port web server should listen for requests on. |
| --config <path> | | configs/models.yaml | Configuration file for models and their weights. |
| --iterations <int> | -n<int> | 1 | How many images to generate per prompt. |
| --grid | -g | False | Save all image series as a grid rather than individually. |
| --sampler <sampler>| -A<sampler>| k_lms | Sampler to use. Use -h to get list of available samplers. |
| --seamless | | False | Create interesting effects by tiling elements of the image. |
| --embedding_path <path>| | None | Path to pre-trained embedding manager checkpoints, for custom models |
| --gfpgan_dir | | src/gfpgan | Path to where GFPGAN is installed. |
| --gfpgan_model_path| | experiments/pretrained_models/GFPGANv1.3.pth| Path to GFPGAN model file, relative to --gfpgan_dir. |
| --device <device> | -d<device>| torch.cuda.current_device() | Device to run SD on, e.g. "cuda:0" |
| Argument | Shortcut | Default | Description |
| :---------------------- | :---------: | ------------------------------------------------ | ---------------------------------------------------------------------------------------------------- |
| --help | -h | | Print a concise help message. |
| --outdir <path> | -o<path> | outputs/img_samples | Location for generated images. |
| --prompt_as_dir | -p | False | Name output directories using the prompt text. |
| --from_file <path> | | None | Read list of prompts from a file. Use "-" to read from standard input |
| --model <modelname> | | stable-diffusion-1.4 | Loads model specified in configs/models.yaml. Currently one of "stable-diffusion-1.4" or "laion400m" |
| --full_precision | -F | False | Run in slower full-precision mode. Needed for Macintosh M1/M2 hardware and some older video cards. |
| --web | | False | Start in web server mode |
| --host <ip addr> | | localhost | Which network interface web server should listen on. Set to 0.0.0.0 to listen on any. |
| --port <port> | | 9090 | Which port web server should listen for requests on. |
| --config <path> | | configs/models.yaml | Configuration file for models and their weights. |
| --iterations <int> | -n<int> | 1 | How many images to generate per prompt. |
| --grid | -g | False | Save all image series as a grid rather than individually. |
| --sampler <sampler> | -A<sampler> | k_lms | Sampler to use. Use -h to get list of available samplers. |
| --seamless | | False | Create interesting effects by tiling elements of the image. |
| --embedding_path <path> | | None | Path to pre-trained embedding manager checkpoints, for custom models |
| --gfpgan_dir | | src/gfpgan | Path to where GFPGAN is installed. |
| --gfpgan_model_path | | experiments/pretrained_models<br>/GFPGANv1.3.pth | Path to GFPGAN model file, relative to --gfpgan_dir. |
| --device <device> | -d<device> | torch.cuda.current_device() | Device to run SD on, e.g. "cuda:0" |
These arguments are deprecated but still work:
| Argument | Shortcut | Default | Description |
|--------------------|------------|---------------------|--------------|
| --weights <path> | | None | Pth to weights file; use `--model stable-diffusion-1.4` instead |
| --laion400m | -l | False | Use older LAION400m weights; use `--model=laion400m` instead |
| Argument | Shortcut | Default | Description |
| ---------------- | -------- | ------- | --------------------------------------------------------------- |
| --weights <path> | | None | Pth to weights file; use `--model stable-diffusion-1.4` instead |
| --laion400m | -l | False | Use older LAION400m weights; use `--model=laion400m` instead |
**A note on path names:** On Windows systems, you may run into
problems when passing the dream script standard backslashed path
names because the Python interpreter treats "\" as an escape.
You can either double your slashes (ick): C:\\\\path\\\\to\\\\my\\\\file, or
use Linux/Mac style forward slashes (better): C:/path/to/my/file.
### **A note on path names:**
## List of prompt arguments
On Windows systems, you may run into problems when passing the dream script standard backslashed
path names because the Python interpreter treats "\" as an escape. You can either double your
slashes (ick): `C:\\\\path\\\\to\\\\my\\\\file`, or use Linux/Mac style forward slashes (better):
`C:/path/to/my/file`.
After the dream.py script initializes, it will present you with a
**dream>** prompt. Here you can enter information to generate images
from text (txt2img), to embellish an existing image or sketch
(img2img), or to selectively alter chosen regions of the image
(inpainting).
### List of prompt arguments
### This is an example of txt2img:
After the dream.py script initializes, it will present you with a **dream>** prompt. Here you can
enter information to generate images from text (txt2img), to embellish an existing image or sketch
(img2img), or to selectively alter chosen regions of the image (inpainting).
~~~~
dream> waterfall and rainbow -W640 -H480
~~~~
### This is an example of txt2img
This will create the requested image with the dimensions 640 (width)
and 480 (height).
```bash
dream> "waterfall and rainbow" -W640 -H480
```
Here are the dream> command that apply to txt2img:
This will create the requested image with the dimensions 640 (width) and 480 (height).
| Argument | Shortcut | Default | Description |
|--------------------|------------|---------------------|--------------|
| "my prompt" | | | Text prompt to use. The quotation marks are optional. |
| --width <int> | -W<int> | 512 | Width of generated image |
| --height <int> | -H<int> | 512 | Height of generated image |
| --iterations <int> | -n<int> | 1 | How many images to generate from this prompt |
| --steps <int> | -s<int> | 50 | How many steps of refinement to apply |
| --cfg_scale <float>| -C<float> | 7.5 | How hard to try to match the prompt to the generated image; any number greater than 0.0 works, but the useful range is roughly 5.0 to 20.0 |
| --seed <int> | -S<int> | None | Set the random seed for the next series of images. This can be used to recreate an image generated previously.|
| --sampler <sampler>| -A<sampler>| k_lms | Sampler to use. Use -h to get list of available samplers. |
| --grid | -g | False | Turn on grid mode to return a single image combining all the images generated by this prompt |
| --individual | -i | True | Turn off grid mode (deprecated; leave off --grid instead) |
| --outdir <path> | -o<path> | outputs/img_samples | Temporarily change the location of these images |
| --seamless | | False | Activate seamless tiling for interesting effects |
| --log_tokenization | -t | False | Display a color-coded list of the parsed tokens derived from the prompt |
| --skip_normalization| -x | False | Weighted subprompts will not be normalized. See [Weighted Prompts](./OTHER.md#weighted-prompts) |
| --upscale <int> <float> | -U <int> <float> | -U 1 0.75| Upscale image by magnification factor (2, 4), and set strength of upscaling (0.0-1.0). If strength not set, will default to 0.75. |
| --gfpgan_strength <float> | -G <float> | -G0 | Fix faces using the GFPGAN algorithm; argument indicates how hard the algorithm should try (0.0-1.0) |
| --save_original | -save_orig| False | When upscaling or fixing faces, this will cause the original image to be saved rather than replaced. |
| --variation <float> |-v<float>| 0.0 | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with -S<seed> and -n<int> to generate a series a riffs on a starting image. See [Variations](./VARIATIONS.md). |
| --with_variations <pattern> | -V<pattern>| None | Combine two or more variations. See [Variations](./VARIATIONS.md) for now to use this. |
Those are the `dream` commands that apply to txt2img:
Note that the width and height of the image must be multiples of
64. You can provide different values, but they will be rounded down to
the nearest multiple of 64.
| Argument | Shortcut | Default | Description |
| --------------------------- | ---------------- | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| "my prompt" | | | Text prompt to use. The quotation marks are optional. |
| --width <int> | -W<int> | 512 | Width of generated image |
| --height <int> | -H<int> | 512 | Height of generated image |
| --iterations <int> | -n<int> | 1 | How many images to generate from this prompt |
| --steps <int> | -s<int> | 50 | How many steps of refinement to apply |
| --cfg_scale <float> | -C<float> | 7.5 | How hard to try to match the prompt to the generated image; any number greater than 0.0 works, but the useful range is roughly 5.0 to 20.0 |
| --seed <int> | -S<int> | None | Set the random seed for the next series of images. This can be used to recreate an image generated previously. |
| --sampler <sampler> | -A<sampler> | k_lms | Sampler to use. Use -h to get list of available samplers. |
| --grid | -g | False | Turn on grid mode to return a single image combining all the images generated by this prompt |
| --individual | -i | True | Turn off grid mode (deprecated; leave off --grid instead) |
| --outdir <path> | -o<path> | outputs/img_samples | Temporarily change the location of these images |
| --seamless | | False | Activate seamless tiling for interesting effects |
| --log_tokenization | -t | False | Display a color-coded list of the parsed tokens derived from the prompt |
| --skip_normalization | -x | False | Weighted subprompts will not be normalized. See [Weighted Prompts](./OTHER.md#weighted-prompts) |
| --upscale <int> <float> | -U <int> <float> | -U 1 0.75 | Upscale image by magnification factor (2, 4), and set strength of upscaling (0.0-1.0). If strength not set, will default to 0.75. |
| --gfpgan_strength <float> | -G <float> | -G0 | Fix faces using the GFPGAN algorithm; argument indicates how hard the algorithm should try (0.0-1.0) |
| --save_original | -save_orig | False | When upscaling or fixing faces, this will cause the original image to be saved rather than replaced. |
| --variation <float> | -v<float> | 0.0 | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with -S<seed> and -n<int> to generate a series a riffs on a starting image. See [Variations](./VARIATIONS.md). |
| --with_variations <pattern> | -V<pattern> | None | Combine two or more variations. See [Variations](./VARIATIONS.md) for now to use this. |
Note that the width and height of the image must be multiples of 64. You can provide different
values, but they will be rounded down to the nearest multiple of 64.
### This is an example of img2img:
### This is an example of img2img
~~~~
```bash
dream> waterfall and rainbow -I./vacation-photo.png -W640 -H480 --fit
~~~~
```
This will modify the indicated vacation photograph by making it more
like the prompt. Results will vary greatly depending on what is in the
image. We also ask to --fit the image into a box no bigger than
640x480. Otherwise the image size will be identical to the provided
photo and you may run out of memory if it is large.
This will modify the indicated vacation photograph by making it more like the prompt. Results will
vary greatly depending on what is in the image. We also ask to --fit the image into a box no bigger
than 640x480. Otherwise the image size will be identical to the provided photo and you may run out
of memory if it is large.
In addition to the command-line options recognized by txt2img, img2img
accepts additional options:
In addition to the command-line options recognized by txt2img, img2img accepts additional options:
| Argument | Shortcut | Default | Description |
|--------------------|------------|---------------------|--------------|
| --init_img <path> | -I<path> | None | Path to the initialization image |
| --fit | -F | False | Scale the image to fit into the specified -H and -W dimensions |
| --strength <float> | -s<float> | 0.75 | How hard to try to match the prompt to the initial image. Ranges from 0.0-0.99, with higher values replacing the initial image completely.|
| Argument | Shortcut | Default | Description |
| ------------------ | --------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
| --init_img <path> | -I<path> | None | Path to the initialization image |
| --fit | -F | False | Scale the image to fit into the specified -H and -W dimensions |
| --strength <float> | -s<float> | 0.75 | How hard to try to match the prompt to the initial image. Ranges from 0.0-0.99, with higher values replacing the initial image completely. |
### This is an example of inpainting:
### This is an example of inpainting
~~~~
dream> waterfall and rainbow -I./vacation-photo.png -M./vacation-mask.png -W640 -H480 --fit
~~~~
```bash
dream> "waterfall and rainbow" -I./vacation-photo.png -M./vacation-mask.png -W640 -H480 --fit
```
This will do the same thing as img2img, but image alterations will
only occur within transparent areas defined by the mask file specified
by -M. You may also supply just a single initial image with the areas
to overpaint made transparent, but you must be careful not to destroy
the pixels underneath when you create the transparent areas. See
[Inpainting](./INPAINTING.md) for details.
This will do the same thing as img2img, but image alterations will only occur within transparent
areas defined by the mask file specified by -M. You may also supply just a single initial image with
the areas to overpaint made transparent, but you must be careful not to destroy the pixels
underneath when you create the transparent areas. See [Inpainting](./INPAINTING.md) for details.
inpainting accepts all the arguments used for txt2img and img2img, as
well as the --mask (-M) argument:
inpainting accepts all the arguments used for txt2img and img2img, as well as the --mask (-M)
argument:
| Argument | Shortcut | Default | Description |
|--------------------|------------|---------------------|--------------|
| --init_mask <path> | -M<path> | None |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|
| Argument | Shortcut | Default | Description |
| ------------------ | -------- | ------- | ------------------------------------------------------------------------------------------------ |
| --init_mask <path> | -M<path> | None | Path to an image the same size as the initial_image, with areas for inpainting made transparent. |
## Command-line editing and completion
# Shortcuts
If you are on a Macintosh or Linux machine, the command-line offers convenient history tracking,
editing, and command completion.
Since one so frequently refers back to a previously-generated seed or
image, dream.py provides an easy shortcut that avoids having to cut
and paste these values.
Here's how it works. Say you generated 6 images of a man-eating snail:
~~~~
dream> man-eating snail -n6
...
>> Usage stats:
>> 6 image(s) generated in 79.85s
>> Max VRAM used for this generation: 3.36G. Current VRAM utilization:2.21G
>> Max VRAM used since script start: 3.36G
Outputs:
[1] outputs/img-samples/000210.1414805682.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1414805682
[2] outputs/img-samples/000210.3312885013.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S3312885013
[3] outputs/img-samples/000210.1398528919.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1398528919
[4] outputs/img-samples/000210.92626031.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S92626031
[5] outputs/img-samples/000210.1733666373.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1733666373
[6] outputs/img-samples/000210.2453524229.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S2453524229
~~~~
The last image generated (with seed 2453524229) looks really good. So let's
pick that one for variation generation. Instead of cutting and pasting
the argument -S2453524229, we can simply refer to the most recent seed as
-1, and write:
~~~~
dream> man-eating snail -v0.1 -n10 -S-1
>> Reusing previous seed 2453524229
...etc...
~~~~
You can use -2 to refer to the second to last seed, -3 to the third to
last, etc. It works with both individual images and grids. However,
the numbering system only extends across the last group of images
generated and doesn't reach back to earlier commands.
The initial image (-I or --init_img) argument works in a similar
way. To use the second-to-most-recent snail image as the initial
image for an img2img render, you could refer to it as -I-2:
~~~~
dream> glowing science-fiction snail -I -2 -n4
>> Reusing previous image outputs/img-samples/000213.2150458613.png
...etc...
~~~~
# Command-line editing and completion
If you are on a Macintosh or Linux machine, the command-line offers
convenient history tracking, editing, and command completion.
- To scroll through previous commands and potentially edit/reuse them, use the up and down cursor keys.
- To edit the current command, use the left and right cursor keys to position the cursor, and then backspace, delete or insert characters.
- To scroll through previous commands and potentially edit/reuse them, use the up and down cursor
keys.
- To edit the current command, use the left and right cursor keys to position the cursor, and then
backspace, delete or insert characters.
- To move to the very beginning of the command, type CTRL-A (or command-A on the Mac)
- To move to the end of the command, type CTRL-E.
- To cut a section of the command, position the cursor where you want to start cutting and type CTRL-K.
- To cut a section of the command, position the cursor where you want to start cutting and type
CTRL-K.
- To paste a cut section back in, position the cursor where you want to paste, and type CTRL-Y
Windows users can get similar, but more limited, functionality if they
launch dream.py with the "winpty" program:
Windows users can get similar, but more limited, functionality if they launch dream.py with the
"winpty" program:
~~~
```
> winpty python scripts\dream.py
~~~
```
On the Mac and Linux platforms, when you exit dream.py, the last 1000
lines of your command-line history will be saved. When you restart
dream.py, you can access the saved history using the up-arrow key.
On the Mac and Linux platforms, when you exit dream.py, the last 1000 lines of your command-line
history will be saved. When you restart dream.py, you can access the saved history using the
up-arrow key.
In addition, limited command-line completion is installed. In various
contexts, you can start typing your command and press tab. A list of
potential completions will be presented to you. You can then type a
little more, hit tab again, and eventually autocomplete what you want.
In addition, limited command-line completion is installed. In various contexts, you can start typing
your command and press tab. A list of potential completions will be presented to you. You can then
type a little more, hit tab again, and eventually autocomplete what you want.
When specifying file paths using the one-letter shortcuts, the CLI
will attempt to complete pathnames for you. This is most handy for the
-I (init image) and -M (init mask) paths. To initiate completion, start
the path with a slash ("/") or "./". For example:
When specifying file paths using the one-letter shortcuts, the CLI will attempt to complete
pathnames for you. This is most handy for the -I (init image) and -M (init mask) paths. To initiate
completion, start the path with a slash ("/") or "./". For example:
~~~
```
dream> zebra with a mustache -I./test-pictures<TAB>
-I./test-pictures/Lincoln-and-Parrot.png -I./test-pictures/zebra.jpg -I./test-pictures/madonna.png
-I./test-pictures/bad-sketch.png -I./test-pictures/man_with_eagle/
~~~
```
You can then type "z", hit tab again, and it will autofill to "zebra.jpg".
More text completion features (such as autocompleting seeds) are on their way.

View File

@ -1,30 +1,29 @@
# **Image-to-Image**
---
title: Image-to-Image
---
This script also provides an img2img feature that lets you seed your
creations with an initial drawing or photo. This is a really cool
feature that tells stable diffusion to build the prompt on top of the
image you provide, preserving the original's basic shape and
layout. To use it, provide the `--init_img` option as shown here:
## **IMG2IMG**
This script also provides an `img2img` feature that lets you seed your creations with an initial
drawing or photo. This is a really cool feature that tells stable diffusion to build the prompt on
top of the image you provide, preserving the original's basic shape and layout. To use it, provide
the `--init_img` option as shown here:
```
```bash
dream> "waterfall and rainbow" --init_img=./init-images/crude_drawing.png --strength=0.5 -s100 -n4
```
The `--init_img (-I)` option gives the path to the seed
picture. `--strength (-f)` controls how much the original will be
modified, ranging from `0.0` (keep the original intact), to `1.0`
(ignore the original completely). The default is `0.75`, and ranges
from `0.25-0.75` give interesting results.
The `--init_img (-I)` option gives the path to the seed picture. `--strength (-f)` controls how much
the original will be modified, ranging from `0.0` (keep the original intact), to `1.0` (ignore the
original completely). The default is `0.75`, and ranges from `0.25-0.75` give interesting results.
You may also pass a `-v<variation_amount>` option to generate `-n<iterations>` count variants on
the original image. This is done by passing the first generated image
back into img2img the requested number of times. It generates
interesting variants.
If the initial image contains transparent regions, then Stable
Diffusion will only draw within the transparent regions, a process
called "inpainting". However, for this to work correctly, the color
information underneath the transparent needs to be preserved, not
erased. See [Creating Transparent Images For
Inpainting](./INPAINTING.md#creating-transparent-regions-for-inpainting)
for details.
If the initial image contains transparent regions, then Stable Diffusion will only draw within the
transparent regions, a process called "inpainting". However, for this to work correctly, the color
information underneath the transparent needs to be preserved, not erased.
More Details can be found here:
[Creating Transparent Images For Inpainting](./INPAINTING.md#creating-transparent-regions-for-inpainting)

View File

@ -1,27 +1,27 @@
# **Creating Transparent Regions for Inpainting**
---
title: Inpainting
---
Inpainting is really cool. To do it, you start with an initial image
and use a photoeditor to make one or more regions transparent
(i.e. they have a "hole" in them). You then provide the path to this
image at the dream> command line using the `-I` switch. Stable
Diffusion will only paint within the transparent region.
## **Creating Transparent Regions for Inpainting**
There's a catch. In the current implementation, you have to prepare
the initial image correctly so that the underlying colors are
preserved under the transparent area. Many imaging editing
applications will by default erase the color information under the
transparent pixels and replace them with white or black, which will
lead to suboptimal inpainting. You also must take care to export the
PNG file in such a way that the color information is preserved.
Inpainting is really cool. To do it, you start with an initial image and use a photoeditor to make
one or more regions transparent (i.e. they have a "hole" in them). You then provide the path to this
image at the dream> command line using the `-I` switch. Stable Diffusion will only paint within the
transparent region.
If your photoeditor is erasing the underlying color information,
`dream.py` will give you a big fat warning. If you can't find a way to
coax your photoeditor to retain color values under transparent areas,
then you can combine the `-I` and `-M` switches to provide both the
original unedited image and the masked (partially transparent) image:
There's a catch. In the current implementation, you have to prepare the initial image correctly so
that the underlying colors are preserved under the transparent area. Many imaging editing
applications will by default erase the color information under the transparent pixels and replace
them with white or black, which will lead to suboptimal inpainting. You also must take care to
export the PNG file in such a way that the color information is preserved.
```
dream> man with cat on shoulder -I./images/man.png -M./images/man-transparent.png
If your photoeditor is erasing the underlying color information, `dream.py` will give you a big fat
warning. If you can't find a way to coax your photoeditor to retain color values under transparent
areas, then you can combine the `-I` and `-M` switches to provide both the original unedited image
and the masked (partially transparent) image:
```bash
dream> "man with cat on shoulder" -I./images/man.png -M./images/man-transparent.png
```
We are hoping to get rid of the need for this workaround in an upcoming release.
@ -37,5 +37,5 @@ We are hoping to get rid of the need for this workaround in an upcoming release.
5. Open the Layers toolbar (^L) and select "Floating Selection"
6. Set opacity to 0%
7. Export as PNG
8. In the export dialogue, Make sure the "Save colour values from
transparent pixels" checkbox is selected.
8. In the export dialogue, Make sure the "Save colour values from transparent pixels" checkbox is
selected.

View File

@ -1,25 +1,28 @@
---
title: Others
---
## **Google Colab**
Stable Diffusion AI Notebook: <a
href="https://colab.research.google.com/github/lstein/stable-diffusion/blob/main/notebooks/Stable_Diffusion_AI_Notebook.ipynb"
target="_parent"><img
target="_parent">
<img
src="https://colab.research.google.com/assets/colab-badge.svg"
alt="Open In Colab"/></a> <br> Open and follow instructions to use an
isolated environment running Dream.<br>
alt="Open In Colab"/></a> <br> Open and follow instructions to use an isolated environment running
Dream.<br>
Output Example:
![Colab Notebook](../assets/colab_notebook.png)
Output Example: ![Colab Notebook](../assets/colab_notebook.png)
---
## **Seamless Tiling**
The seamless tiling mode causes generated images to seamlessly tile
with itself. To use it, add the `--seamless` option when starting the
script which will result in all generated images to tile, or for each
`dream>` prompt as shown here:
The seamless tiling mode causes generated images to seamlessly tile with itself. To use it, add the
`--seamless` option when starting the script which will result in all generated images to tile, or
for each `dream>` prompt as shown here:
```
```python
dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
```
@ -27,12 +30,11 @@ dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
## **Reading Prompts from a File**
You can automate `dream.py` by providing a text file with the prompts
you want to run, one line per prompt. The text file must be composed
with a text editor (e.g. Notepad) and not a word processor. Each line
should look like what you would type at the dream> prompt:
You can automate `dream.py` by providing a text file with the prompts you want to run, one line per
prompt. The text file must be composed with a text editor (e.g. Notepad) and not a word processor.
Each line should look like what you would type at the dream> prompt:
```
```bash
a beautiful sunny day in the park, children playing -n4 -C10
stormy weather on a mountain top, goats grazing -s100
innovative packaging for a squid's dinner -S137038382
@ -40,13 +42,13 @@ innovative packaging for a squid's dinner -S137038382
Then pass this file's name to `dream.py` when you invoke it:
```
```bash
(ldm) ~/stable-diffusion$ python3 scripts/dream.py --from_file "path/to/prompts.txt"
```
You may read a series of prompts from standard input by providing a filename of `-`:
```
```bash
(ldm) ~/stable-diffusion$ echo "a beautiful day" | python3 scripts/dream.py --from_file -
```
@ -54,12 +56,16 @@ You may read a series of prompts from standard input by providing a filename of
## **Shortcuts: Reusing Seeds**
Since it is so common to reuse seeds while refining a prompt, there is now a shortcut as of version 1.11. Provide a `**-S**` (or `**--seed**`)
switch of `-1` to use the seed of the most recent image generated. If you produced multiple images with the `**-n**` switch, then you can go back further using -2, -3, etc. up to the first image generated by the previous command. Sorry, but you can't go back further than one command.
Since it is so common to reuse seeds while refining a prompt, there is now a shortcut as of version
1.11. Provide a `**-S**` (or `**--seed**`) switch of `-1` to use the seed of the most recent image
generated. If you produced multiple images with the `**-n**` switch, then you can go back further
using -2, -3, etc. up to the first image generated by the previous command. Sorry, but you can't go
back further than one command.
Here's an example of using this to do a quick refinement. It also illustrates using the new `**-G**` switch to turn on upscaling and face enhancement (see previous section):
Here's an example of using this to do a quick refinement. It also illustrates using the new `**-G**`
switch to turn on upscaling and face enhancement (see previous section):
```
```bash
dream> a cute child playing hopscotch -G0.5
[...]
outputs/img-samples/000039.3498014304.png: "a cute child playing hopscotch" -s50 -W512 -H512 -C7.5 -mk_lms -S3498014304
@ -76,26 +82,26 @@ outputs/img-samples/000040.3498014304.png: "a cute child playing hopscotch" -G1.
## **Weighted Prompts**
You may weight different sections of the prompt to tell the sampler to attach different levels of
priority to them, by adding `:(number)` to the end of the section you wish to up- or downweight.
For example consider this prompt:
priority to them, by adding `:(number)` to the end of the section you wish to up- or downweight. For
example consider this prompt:
```
tabby cat:0.25 white duck:0.75 hybrid
```bash
tabby cat:0.25 white duck:0.75 hybrid
```
This will tell the sampler to invest 25% of its effort on the tabby
cat aspect of the image and 75% on the white duck aspect
(surprisingly, this example actually works). The prompt weights can
use any combination of integers and floating point numbers, and they
do not need to add up to 1.
This will tell the sampler to invest 25% of its effort on the tabby cat aspect of the image and 75%
on the white duck aspect (surprisingly, this example actually works). The prompt weights can use any
combination of integers and floating point numbers, and they do not need to add up to 1.
---
## **Simplified API**
For programmers who wish to incorporate stable-diffusion into other products, this repository includes a simplified API for text to image generation, which lets you create images from a prompt in just three lines of code:
For programmers who wish to incorporate stable-diffusion into other products, this repository
includes a simplified API for text to image generation, which lets you create images from a prompt
in just three lines of code:
```
```bash
from ldm.generate import Generate
g = Generate()
outputs = g.txt2img("a unicorn in manhattan")
@ -109,16 +115,14 @@ Please see ldm/generate.py for more information. A set of example scripts is com
## **Preload Models**
In situations where you have limited internet connectivity or are
blocked behind a firewall, you can use the preload script to preload
the required files for Stable Diffusion to run.
In situations where you have limited internet connectivity or are blocked behind a firewall, you can
use the preload script to preload the required files for Stable Diffusion to run.
The preload script `scripts/preload_models.py` needs to be run once at
least while connected to the internet. In the following runs, it will
load up the cached versions of the required files from the `.cache`
directory of the system.
The preload script `scripts/preload_models.py` needs to be run once at least while connected to the
internet. In the following runs, it will load up the cached versions of the required files from the
`.cache` directory of the system.
```
```bash
(ldm) ~/stable-diffusion$ python3 ./scripts/preload_models.py
preloading bert tokenizer...
Downloading: 100%|██████████████████████████████████| 28.0/28.0 [00:00<00:00, 49.3kB/s]

View File

@ -1,70 +1,91 @@
# **Personalizing Text-to-Image Generation**
---
title: TEXTUAL_INVERSION
---
You may personalize the generated images to provide your own styles or objects by training a new LDM checkpoint and introducing a new vocabulary to the fixed model as a (.pt) embeddings file. Alternatively, you may use or train HuggingFace Concepts embeddings files (.bin) from https://huggingface.co/sd-concepts-library and its associated notebooks.
## **Personalizing Text-to-Image Generation**
**Training**
You may personalize the generated images to provide your own styles or objects
by training a new LDM checkpoint and introducing a new vocabulary to the fixed
model as a (.pt) embeddings file. Alternatively, you may use or train
HuggingFace Concepts embeddings files (.bin) from
<https://huggingface.co/sd-concepts-library> and its associated notebooks.
To train, prepare a folder that contains images sized at 512x512 and execute the following:
## **Training**
**WINDOWS**: As the default backend is not available on Windows, if you're using that platform, set the environment variable `PL_TORCH_DISTRIBUTED_BACKEND=gloo`
To train, prepare a folder that contains images sized at 512x512 and execute the
following:
```
(ldm) ~/stable-diffusion$ python3 ./main.py --base ./configs/stable-diffusion/v1-finetune.yaml \
-t \
--actual_resume ./models/ldm/stable-diffusion-v1/model.ckpt \
-n my_cat \
--gpus 0, \
--data_root D:/textual-inversion/my_cat \
--init_word 'cat'
### WINDOWS
As the default backend is not available on Windows, if you're using that
platform, set the environment variable `PL_TORCH_DISTRIBUTED_BACKEND` to `gloo`
```bash
python3 ./main.py --base ./configs/stable-diffusion/v1-finetune.yaml \
--actual_resume ./models/ldm/stable-diffusion-v1/model.ckpt \
-t \
-n my_cat \
--gpus 0 \
--data_root D:/textual-inversion/my_cat \
--init_word 'cat'
```
During the training process, files will be created in
/logs/[project][time][project]/ where you can see the process.
`/logs/[project][time][project]/` where you can see the process.
Conditioning contains the training prompts inputs, reconstruction the
input images for the training epoch samples, samples scaled for a
sample of the prompt and one with the init word provided.
Conditioning contains the training prompts inputs, reconstruction the input
images for the training epoch samples, samples scaled for a sample of the prompt
and one with the init word provided.
On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec.
_Note_: According to the associated paper, the optimal number of
images is 3-5. Your model may not converge if you use more images than
that.
!!! Info _Note_
Training will run indefinitely, but you may wish to stop it (with
ctrl-c) before the heat death of the universe, when you find a low
loss epoch or around ~5000 iterations. Note that you can set a fixed
limit on the number of training steps by decreasing the "max_steps"
option in configs/stable_diffusion/v1-finetune.yaml (currently set to
4000000)
According to the associated paper, the optimal number of
images is 3-5. Your model may not converge if you use more images than
that.
**Running**
Training will run indefinitely, but you may wish to stop it (with ctrl-c) before
the heat death of the universe, when you find a low loss epoch or around ~5000
iterations. Note that you can set a fixed limit on the number of training steps
by decreasing the "max_steps" option in
configs/stable_diffusion/v1-finetune.yaml (currently set to 4000000)
Once the model is trained, specify the trained .pt or .bin file when
starting dream using
## **Run the Model**
```
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision
Once the model is trained, specify the trained .pt or .bin file when starting
dream using
```bash
python3 ./scripts/dream.py \
--embedding_path /path/to/embedding.pt \
--full_precision
```
Then, to utilize your subject at the dream prompt
```
```bash
dream> "a photo of *"
```
This also works with image2image
```
```bash
dream> "waterfall and rainbow in the style of *" --init_img=./init-images/crude_drawing.png --strength=0.5 -s100 -n4
```
For .pt files it's also possible to train multiple tokens (modify the placeholder string in `configs/stable-diffusion/v1-finetune.yaml`) and combine LDM checkpoints using:
For .pt files it's also possible to train multiple tokens (modify the
placeholder string in `configs/stable-diffusion/v1-finetune.yaml`) and combine
LDM checkpoints using:
```
(ldm) ~/stable-diffusion$ python3 ./scripts/merge_embeddings.py \
--manager_ckpts /path/to/first/embedding.pt /path/to/second/embedding.pt [...] \
--output_path /path/to/output/embedding.pt
```bash
python3 ./scripts/merge_embeddings.py \
--manager_ckpts /path/to/first/embedding.pt \
[</path/to/second/embedding.pt>,[...]] \
--output_path /path/to/output/embedding.pt
```
Credit goes to rinongal and the repository located at https://github.com/rinongal/textual_inversion Please see the repository and associated paper for details and limitations.
Credit goes to rinongal and the repository
Please see [the repository](https://github.com/rinongal/textual_inversion) and
associated paper for details and limitations.

View File

@ -1,105 +1,99 @@
# **GFPGAN and Real-ESRGAN Support**
---
title: Upscale
---
The script also provides the ability to do face restoration and
upscaling with the help of GFPGAN and Real-ESRGAN respectively.
## **GFPGAN and Real-ESRGAN Support**
As of version 1.14, environment.yaml will install the Real-ESRGAN package into the
standard install location for python packages, and will put GFPGAN into a subdirectory of "src"
in the stable-diffusion directory.
(The reason for this is that the standard GFPGAN distribution has a minor bug that adversely affects image
color.) Upscaling with Real-ESRGAN should "just work" without further intervention. Simply pass the --upscale (-U)
option on the dream> command line, or indicate the desired scale on the popup in the Web GUI.
The script also provides the ability to do face restoration and upscaling with the help of GFPGAN
and Real-ESRGAN respectively.
For **GFPGAN** to work, there is one additional step needed. You will need to download and
copy the GFPGAN [models file](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth)
into **src/gfpgan/experiments/pretrained_models**. On Mac and Linux systems, here's how you'd do it using
**wget**:
~~~~
As of version 1.14, environment.yaml will install the Real-ESRGAN package into the standard install
location for python packages, and will put GFPGAN into a subdirectory of "src" in the
stable-diffusion directory. (The reason for this is that the standard GFPGAN distribution has a
minor bug that adversely affects image color.) Upscaling with Real-ESRGAN should "just work" without
further intervention. Simply pass the --upscale (-U) option on the dream> command line, or indicate
the desired scale on the popup in the Web GUI.
For **GFPGAN** to work, there is one additional step needed. You will need to download and copy the
GFPGAN [models file](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth)
into **src/gfpgan/experiments/pretrained_models**. On Mac and Linux systems, here's how you'd do it
using **wget**:
```bash
> wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth src/gfpgan/experiments/pretrained_models/
~~~~
```
Make sure that you're in the stable-diffusion directory when you do this.
Alternatively, if you have GFPGAN installed elsewhere, or if you are using
an earlier version of this package which asked you to install GFPGAN in a
sibling directory, you may use the `--gfpgan_dir` argument with `dream.py` to set a
custom path to your GFPGAN directory. _There are other GFPGAN related
boot arguments if you wish to customize further._
Alternatively, if you have GFPGAN installed elsewhere, or if you are using an earlier version of
this package which asked you to install GFPGAN in a sibling directory, you may use the
`--gfpgan_dir` argument with `dream.py` to set a custom path to your GFPGAN directory. _There are
other GFPGAN related boot arguments if you wish to customize further._
**Note: Internet connection needed:**
Users whose GPU machines are isolated from the Internet (e.g. on a
University cluster) should be aware that the first time you run
dream.py with GFPGAN and Real-ESRGAN turned on, it will try to
download model files from the Internet. To rectify this, you may run
`python3 scripts/preload_models.py` after you have installed GFPGAN
and all its dependencies.
**Note: Internet connection needed:** Users whose GPU machines are isolated from the Internet (e.g.
on a University cluster) should be aware that the first time you run dream.py with GFPGAN and
Real-ESRGAN turned on, it will try to download model files from the Internet. To rectify this, you
may run `python3 scripts/preload_models.py` after you have installed GFPGAN and all its
dependencies.
**Usage**
## **Usage**
You will now have access to two new prompt arguments.
**Upscaling**
### **Upscaling**
`-U : <upscaling_factor> <upscaling_strength>`
The upscaling prompt argument takes two values. The first value is a
scaling factor and should be set to either `2` or `4` only. This will
either scale the image 2x or 4x respectively using different models.
The upscaling prompt argument takes two values. The first value is a scaling factor and should be
set to either `2` or `4` only. This will either scale the image 2x or 4x respectively using
different models.
You can set the scaling stength between `0` and `1.0` to control
intensity of the of the scaling. This is handy because AI upscalers
generally tend to smooth out texture details. If you wish to retain
some of those for natural looking results, we recommend using values
between `0.5 to 0.8`.
You can set the scaling stength between `0` and `1.0` to control intensity of the of the scaling.
This is handy because AI upscalers generally tend to smooth out texture details. If you wish to
retain some of those for natural looking results, we recommend using values between `0.5 to 0.8`.
If you do not explicitly specify an upscaling_strength, it will
default to 0.75.
If you do not explicitly specify an upscaling_strength, it will default to 0.75.
**Face Restoration**
### **Face Restoration**
`-G : <gfpgan_strength>`
This prompt argument controls the strength of the face restoration
that is being applied. Similar to upscaling, values between `0.5 to 0.8` are recommended.
This prompt argument controls the strength of the face restoration that is being applied. Similar to
upscaling, values between `0.5 to 0.8` are recommended.
You can use either one or both without any conflicts. In cases where
you use both, the image will be first upscaled and then the face
restoration process will be executed to ensure you get the highest
You can use either one or both without any conflicts. In cases where you use both, the image will be
first upscaled and then the face restoration process will be executed to ensure you get the highest
quality facial features.
`--save_orig`
When you use either `-U` or `-G`, the final result you get is upscaled
or face modified. If you want to save the original Stable Diffusion
generation, you can use the `-save_orig` prompt argument to save the
original unaffected version too.
When you use either `-U` or `-G`, the final result you get is upscaled or face modified. If you want
to save the original Stable Diffusion generation, you can use the `-save_orig` prompt argument to
save the original unaffected version too.
**Example Usage**
### **Example Usage**
```
dream > superman dancing with a panda bear -U 2 0.6 -G 0.4
```bash
dream> superman dancing with a panda bear -U 2 0.6 -G 0.4
```
This also works with img2img:
```
```bash
dream> a man wearing a pineapple hat -I path/to/your/file.png -U 2 0.5 -G 0.6
```
**Note**
### **Note**
GFPGAN and Real-ESRGAN are both memory intensive. In order to avoid
crashes and memory overloads during the Stable Diffusion process,
these effects are applied after Stable Diffusion has completed its
work.
GFPGAN and Real-ESRGAN are both memory intensive. In order to avoid crashes and memory overloads
during the Stable Diffusion process, these effects are applied after Stable Diffusion has completed
its work.
In single image generations, you will see the output right away but
when you are using multiple iterations, the images will first be
generated and then upscaled and face restored after that process is
complete. While the image generation is taking place, you will still
be able to preview the base images.
In single image generations, you will see the output right away but when you are using multiple
iterations, the images will first be generated and then upscaled and face restored after that
process is complete. While the image generation is taking place, you will still be able to preview
the base images.
If you wish to stop during the image generation but want to upscale or
face restore a particular generated image, pass it again with the same
prompt and generated seed along with the `-U` and `-G` prompt
arguments to perform those actions.
If you wish to stop during the image generation but want to upscale or face restore a particular
generated image, pass it again with the same prompt and generated seed along with the `-U` and `-G`
prompt arguments to perform those actions.

View File

@ -1,26 +1,33 @@
# **Variations**
---
title: Variations
---
Release 1.13 of SD-Dream adds support for image variations.
You are able to do the following:
1. Generate a series of systematic variations of an image, given a prompt. The amount of variation from one image to the next can be controlled.
1. Generate a series of systematic variations of an image, given a prompt. The
amount of variation from one image to the next can be controlled.
2. Given two or more variations that you like, you can combine them in a weighted fashion.
2. Given two or more variations that you like, you can combine them in a
weighted fashion.
---
This cheat sheet provides a quick guide for how this works in practice, using variations to create the desired image of Xena, Warrior Princess.
This cheat sheet provides a quick guide for how this works in practice, using
variations to create the desired image of Xena, Warrior Princess.
---
## Step 1 -- Find a base image that you like
The prompt we will use throughout is `lucy lawless as xena, warrior princess, character portrait, high resolution.`
The prompt we will use throughout is
`lucy lawless as xena, warrior princess, character portrait, high resolution.`
This will be indicated as `prompt` in the examples below.
First we let SD create a series of images in the usual way, in this case requesting six iterations:
First we let SD create a series of images in the usual way, in this case
requesting six iterations:
```
dream> lucy lawless as xena, warrior princess, character portrait, high resolution -n6
@ -36,17 +43,18 @@ Outputs:
The one with seed 3357757885 looks nice:
<img src="../assets/variation_walkthru/000001.3357757885.png"/>
![var1](../assets/variation_walkthru/000001.3357757885.png)
---
## Step 2 - Generating Variations
Let's try to generate some variations. Using the same seed, we pass the argument `-v0.1` (or --variant_amount), which generates a series of
variations each differing by a variation amount of 0.2. This number ranges from `0` to `1.0`, with higher numbers being larger amounts of
variation.
Let's try to generate some variations. Using the same seed, we pass the argument
`-v0.1` (or --variant_amount), which generates a series of variations each
differing by a variation amount of 0.2. This number ranges from `0` to `1.0`,
with higher numbers being larger amounts of variation.
```
```bash
dream> "prompt" -n6 -S3357757885 -v0.2
...
Outputs:
@ -60,33 +68,41 @@ Outputs:
### **Variation Sub Seeding**
Note that the output for each image has a `-V` option giving the "variant subseed" for that image, consisting of a seed followed by the
variation amount used to generate it.
Note that the output for each image has a `-V` option giving the "variant
subseed" for that image, consisting of a seed followed by the variation amount
used to generate it.
This gives us a series of closely-related variations, including the two shown here.
This gives us a series of closely-related variations, including the two shown
here.
<img src="../assets/variation_walkthru/000002.3647897225.png">
<img src="../assets/variation_walkthru/000002.1614299449.png">
![var2](../assets/variation_walkthru/000002.3647897225.png)
I like the expression on Xena's face in the first one (subseed 3647897225), and the armor on her shoulder in the second one (subseed 1614299449). Can we combine them to get the best of both worlds?
![var3](../assets/variation_walkthru/000002.1614299449.png)
We combine the two variations using `-V` (--with_variations). Again, we must provide the seed for the originally-chosen image in order for
this to work.
I like the expression on Xena's face in the first one (subseed 3647897225), and
the armor on her shoulder in the second one (subseed 1614299449). Can we combine
them to get the best of both worlds?
```
dream> "prompt" -S3357757885 -V3647897225,0.1;1614299449,0.1
We combine the two variations using `-V` (--with_variations). Again, we must
provide the seed for the originally-chosen image in order for this to work.
```bash
dream> "prompt" -S3357757885 -V3647897225,0.1,1614299449,0.1
Outputs:
./outputs/Xena/000003.1614299449.png: "prompt" -s50 -W512 -H512 -C7.5 -Ak_lms -V 3647897225:0.1,1614299449:0.1 -S3357757885
```
Here we are providing equal weights (0.1 and 0.1) for both the subseeds. The resulting image is close, but not exactly what I wanted:
Here we are providing equal weights (0.1 and 0.1) for both the subseeds. The
resulting image is close, but not exactly what I wanted:
<img src="../assets/variation_walkthru/000003.1614299449.png">
![var4](../assets/variation_walkthru/000003.1614299449.png)
We could either try combining the images with different weights, or we can generate more variations around the almost-but-not-quite image. We do the latter, using both the `-V` (combining) and `-v` (variation strength) options. Note that we use `-n6` to generate 6 variations:
We could either try combining the images with different weights, or we can
generate more variations around the almost-but-not-quite image. We do the
latter, using both the `-V` (combining) and `-v` (variation strength) options.
Note that we use `-n6` to generate 6 variations:
```
dream> "prompt" -S3357757885 -V3647897225,0.1;1614299449,0.1 -v0.05 -n6
dream> "prompt" -S3357757885 -V3647897225,0.1,1614299449,0.1 -v0.05 -n6
Outputs:
./outputs/Xena/000004.3279757577.png: "prompt" -s50 -W512 -H512 -C7.5 -Ak_lms -V 3647897225:0.1,1614299449:0.1,3279757577:0.05 -S3357757885
./outputs/Xena/000004.2853129515.png: "prompt" -s50 -W512 -H512 -C7.5 -Ak_lms -V 3647897225:0.1,1614299449:0.1,2853129515:0.05 -S3357757885
@ -96,9 +112,11 @@ Outputs:
./outputs/Xena/000004.2183375608.png: "prompt" -s50 -W512 -H512 -C7.5 -Ak_lms -V 3647897225:0.1,1614299449:0.1,2183375608:0.05 -S3357757885
```
This produces six images, all slight variations on the combination of the chosen two images. Here's the one I like best:
This produces six images, all slight variations on the combination of the chosen
two images. Here's the one I like best:
<img src="../assets/variation_walkthru/000004.3747154981.png">
![var5](../assets/variation_walkthru/000004.3747154981.png)
As you can see, this is a very powerful tool, which when combined with subprompt weighting, gives you great control over the content and
quality of your generated images.
As you can see, this is a very powerful tool, which when combined with subprompt
weighting, gives you great control over the content and quality of your
generated images.

View File

@ -1,13 +1,19 @@
# Barebones Web Server
---
title: Barebones Web Server
---
As of version 1.10, this distribution comes with a bare bones web server (see screenshot). To use it, run the `dream.py` script by adding the `**--web**` option.
As of version 1.10, this distribution comes with a bare bones web server (see
screenshot). To use it, run the `dream.py` script by adding the `**--web**`
option.
```
```bash
(ldm) ~/stable-diffusion$ python3 scripts/dream.py --web
```
You can then connect to the server by pointing your web browser at http://localhost:9090, or to the network name or IP address of the server.
You can then connect to the server by pointing your web browser at
http://localhost:9090, or to the network name or IP address of the server.
Kudos to [Tesseract Cat](https://github.com/TesseractCat) for contributing this code, and to [dagf2101](https://github.com/dagf2101) for refining it.
Kudos to [Tesseract Cat](https://github.com/TesseractCat) for contributing this
code, and to [dagf2101](https://github.com/dagf2101) for refining it.
![Dream Web Server](../assets/dream_web_server.png)

View File

@ -1,68 +1,89 @@
# **Frequently Asked Questions**
---
title: F.A.Q.
---
Here are a few common installation problems and their solutions. Often these are caused by incomplete installations or crashes during the
install process.
## **Frequently-Asked-Questions**
Here are a few common installation problems and their solutions. Often these are caused by
incomplete installations or crashes during the install process.
---
**QUESTION**
### **QUESTION**
During `conda env create -f environment.yaml`, conda hangs indefinitely.
**SOLUTION**
### **SOLUTION**
Enter the stable-diffusion directory and completely remove the `src` directory and all its contents. The safest way to do this is to enter the stable-diffusion directory and give the command `git clean -f`. If this still doesn't fix the problem, try "conda clean -all" and then restart at the `conda env create` step.
Enter the stable-diffusion directory and completely remove the `src` directory and all its contents.
The safest way to do this is to enter the stable-diffusion directory and give the command
`git clean -f`. If this still doesn't fix the problem, try "conda clean -all" and then restart at
the `conda env create` step.
---
**QUESTION**
### **QUESTION**
`dream.py` crashes with the complaint that it can't find `ldm.simplet2i.py`. Or it complains that function is being passed incorrect parameters.
`dream.py` crashes with the complaint that it can't find `ldm.simplet2i.py`. Or it complains that
function is being passed incorrect parameters.
**SOLUTION**
### **SOLUTION**
Reinstall the stable diffusion modules. Enter the `stable-diffusion` directory and give the command `pip install -e .`
Reinstall the stable diffusion modules. Enter the `stable-diffusion` directory and give the command
`pip install -e .`
---
**QUESTION**
### **QUESTION**
`dream.py` dies, complaining of various missing modules, none of which starts with `ldm``.
**SOLUTION**
### **SOLUTION**
From within the `stable-diffusion` directory, run `conda env update -f environment.yaml` This is also frequently the solution to
complaints about an unknown function in a module.
From within the `stable-diffusion` directory, run `conda env update -f environment.yaml` This is
also frequently the solution to complaints about an unknown function in a module.
---
**QUESTION**
### **QUESTION**
There's a feature or bugfix in the Stable Diffusion GitHub that you want to try out.
**SOLUTION**
### **SOLUTION**
**Main Branch**
#### **Main Branch**
If the fix/feature is on the `main` branch, enter the stable-diffusion directory and do a `git pull`.
If the fix/feature is on the `main` branch, enter the stable-diffusion directory and do a
`git pull`.
Usually this will be sufficient, but if you start to see errors about missing or incorrect modules, use the command `pip install -e .` and/or `conda env update -f environment.yaml` (These commands won't break anything.)
Usually this will be sufficient, but if you start to see errors about missing or incorrect modules,
use the command
**Sub Branch**
`pip install -e .` and/or
If the feature/fix is on a branch (e.g. "_foo-bugfix_"), the recipe is similar, but do a `git pull <name of branch>`.
`conda env update -f environment.yaml`
**Not Committed**
(These commands won't break anything.)
If the feature/fix is in a pull request that has not yet been made part of the main branch or a feature/bugfix branch, then from the page for the desired pull request, look for the line at the top that reads "_xxxx wants to merge xx commits into lstein:main from YYYYYY_". Copy the URL in YYYY. It should have the format `https://github.com/<name of contributor>/stable-diffusion/tree/<name of branch>`
#### **Sub Branch**
Then **go to the directory above stable-diffusion** and rename the directory to "_stable-diffusion.lstein_", "_stable-diffusion.old_", or anything else. You can then git clone the branch that contains the pull request:
If the feature/fix is on a branch (e.g. "_foo-bugfix_"), the recipe is similar, but do a
`git pull <name of branch>`.
```
git clone https://github.com/<name of contributor>/stable-diffusion/tree/<name
of branch>
```
#### **Not Committed**
You will need to go through the install procedure again, but it should be fast because all the dependencies are already loaded.
If the feature/fix is in a pull request that has not yet been made part of the main branch or a
feature/bugfix branch, then from the page for the desired pull request, look for the line at the top
that reads "_xxxx wants to merge xx commits into lstein:main from YYYYYY_". Copy the URL in YYYY. It
should have the format
---
`https://github.com/<name of contributor>/stable-diffusion/tree/<name of branch>`
Then **go to the directory above stable-diffusion** and rename the directory to
"_stable-diffusion.lstein_", "_stable-diffusion.old_", or anything else. You can then git clone the
branch that contains the pull request:
`git clone https://github.com/<name of contributor>/stable-diffusion/tree/<name of branch>`
You will need to go through the install procedure again, but it should be fast because all the
dependencies are already loaded.

19
docs/index.html Normal file
View File

@ -0,0 +1,19 @@
<!-- HTML for static distribution bundle build -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Swagger UI</title>
<link rel="stylesheet" type="text/css" href="swagger-ui/swagger-ui.css" />
<link rel="stylesheet" type="text/css" href="swagger-ui/index.css" />
<link rel="icon" type="image/png" href="swagger-ui/favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="swagger-ui/favicon-16x16.png" sizes="16x16" />
</head>
<body>
<div id="swagger-ui"></div>
<script src="swagger-ui/swagger-ui-bundle.js" charset="UTF-8"> </script>
<script src="swagger-ui/swagger-ui-standalone-preset.js" charset="UTF-8"> </script>
<script src="swagger-ui/swagger-initializer.js" charset="UTF-8"> </script>
</body>
</html>

164
docs/index.md Normal file
View File

@ -0,0 +1,164 @@
---
title: Home
---
<!--
The Docs you find here (/docs/*) are built and deployed via mkdocs. If you want to do so from local it is pretty strait forward:
```bash
pip install -r requirements-mkdocs.txt
mkdocs serve -a localhost:8080
```
-->
<h1 align='center'><b>Stable Diffusion Dream Script</b></h1>
<p align='center'>
<img src="./assets/logo.png"/>
</p>
<p align="center">
<img src="https://img.shields.io/github/last-commit/lstein/stable-diffusion?logo=Python&logoColor=green&style=for-the-badge" alt="last-commit"/>
<img src="https://img.shields.io/github/stars/lstein/stable-diffusion?logo=GitHub&style=for-the-badge" alt="stars"/>
<br>
<img src="https://img.shields.io/github/issues/lstein/stable-diffusion?logo=GitHub&style=for-the-badge" alt="issues"/>
<img src="https://img.shields.io/github/issues-pr/lstein/stable-diffusion?logo=GitHub&style=for-the-badge" alt="pull-requests"/>
</p>
This is a fork of [CompVis/stable-diffusion](https://github.com/CompVis/stable-diffusion), the open
source text-to-image generator. It provides a streamlined process with various new features and
options to aid the image generation process. It runs on Windows, Mac and Linux machines, and runs on
GPU cards with as little as 4 GB or RAM.
_Note: This fork is rapidly evolving. Please use the
[Issues](https://github.com/lstein/stable-diffusion/issues) tab to report bugs and make feature
requests. Be sure to use the provided templates. They will help aid diagnose issues faster._
## Installation
This fork is supported across multiple platforms. You can find individual installation instructions
below.
- [Linux](installation/INSTALL_LINUX.md)
- [Windows](installation/INSTALL_WINDOWS.md)
- [Macintosh](installation/INSTALL_MAC.md)
## Hardware Requirements
### System
You wil need one of the following:
- An NVIDIA-based graphics card with 4 GB or more VRAM memory.
- An Apple computer with an M1 chip.
### Memory
- At least 12 GB Main Memory RAM.
### Disk
- At least 6 GB of free disk space for the machine learning model, Python, and all its dependencies.
### Note
If you are have a Nvidia 10xx series card (e.g. the 1080ti), please run the dream script in
full-precision mode as shown below.
Similarly, specify full-precision mode on Apple M1 hardware.
To run in full-precision mode, start `dream.py` with the `--full_precision` flag:
```bash
(ldm) ~/stable-diffusion$ python scripts/dream.py --full_precision
```
## Features
### Major Features
- [Interactive Command Line Interface](features/CLI.md)
- [Image To Image](features/IMG2IMG.md)
- [Inpainting Support](features/INPAINTING.md)
- [GFPGAN and Real-ESRGAN Support](features/UPSCALE.md)
- [Seamless Tiling](features/OTHER.md#seamless-tiling)
- [Google Colab](features/OTHER.md#google-colab)
- [Web Server](features/WEB.md)
- [Reading Prompts From File](features/OTHER.md#reading-prompts-from-a-file)
- [Shortcut: Reusing Seeds](features/OTHER.md#shortcuts-reusing-seeds)
- [Weighted Prompts](features/OTHER.md#weighted-prompts)
- [Variations](features/VARIATIONS.md)
- [Personalizing Text-to-Image Generation](features/TEXTUAL_INVERSION.md)
- [Simplified API for text to image generation](features/OTHER.md#simplified-api)
### Other Features
- [Creating Transparent Regions for Inpainting](features/INPAINTING.md#creating-transparent-regions-for-inpainting)
- [Preload Models](features/OTHER.md#preload-models)
## Latest Changes
### v1.14 <small>(11 September 2022)</small>
- Memory optimizations for small-RAM cards. 512x512 now possible on 4 GB GPUs.
- Full support for Apple hardware with M1 or M2 chips.
- Add "seamless mode" for circular tiling of image. Generates beautiful effects.
([prixt](https://github.com/prixt)).
- Inpainting support.
- Improved web server GUI.
- Lots of code and documentation cleanups.
### v1.13 <small>(3 September 2022</small>
- Support image variations (see [VARIATIONS](features/VARIATIONS.md)
([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers)
- Supports a Google Colab notebook for a standalone server running on Google hardware
[Arturo Mendivil](https://github.com/artmen1516)
- WebUI supports GFPGAN/ESRGAN facial reconstruction and upscaling
[Kevin Gibbons](https://github.com/bakkot)
- WebUI supports incremental display of in-progress images during generation
[Kevin Gibbons](https://github.com/bakkot)
- A new configuration file scheme that allows new models (including upcoming stable-diffusion-v1.5)
to be added without altering the code. ([David Wager](https://github.com/maddavid12))
- Can specify --grid on dream.py command line as the default.
- Miscellaneous internal bug and stability fixes.
- Works on M1 Apple hardware.
- Multiple bug fixes.
For older changelogs, please visit the **[CHANGELOG](features/CHANGELOG.md)**.
## Troubleshooting
Please check out our **[Q&A](help/TROUBLESHOOT.md)** to get solutions for common installation
problems and other issues.
## Contributing
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how
to contribute to GitHub projects, here is a
[Getting Started Guide](https://opensource.com/article/19/7/create-pull-request-github).
A full set of contribution guidelines, along with templates, are in progress, but for now the most
important thing is to **make your pull request against the "development" branch**, and not against
"main". This will help keep public breakage to a minimum and will allow you to propose more radical
changes.
## Contributors
This fork is a combined effort of various people from across the world.
[Check out the list of all these amazing people](other/CONTRIBUTORS.md). We thank them for their
time, hard work and effort.
## Support
For support, please use this repository's GitHub Issues tracking service. Feel free to send me an
email if you use and like the script.
Original portions of the software are Copyright (c) 2020
[Lincoln D. Stein](https://github.com/lstein)
## Further Reading
Please see the original README for more information on this software and underlying algorithm,
located in the file [README-CompViz.md](other/README-CompViz.md).

View File

@ -1,89 +1,110 @@
# **Linux Installation**
---
title: Linux
---
1. You will need to install the following prerequisites if they are not already available. Use your operating system's preferred installer
1. You will need to install the following prerequisites if they are not already
available. Use your operating system's preferred installer.
- Python (version 3.8.5 recommended; higher may work)
- git
- Python (version 3.8.5 recommended; higher may work)
- git
2. Install the Python Anaconda environment manager.
```
~$ wget https://repo.anaconda.com/archive/Anaconda3-2022.05-Linux-x86_64.sh
~$ chmod +x Anaconda3-2022.05-Linux-x86_64.sh
~$ ./Anaconda3-2022.05-Linux-x86_64.sh
```
```bash
~$ wget https://repo.anaconda.com/archive/Anaconda3-2022.05-Linux-x86_64.sh
~$ chmod +x Anaconda3-2022.05-Linux-x86_64.sh
~$ ./Anaconda3-2022.05-Linux-x86_64.sh
```
After installing anaconda, you should log out of your system and log back in. If the installation
worked, your command prompt will be prefixed by the name of the current anaconda environment - `(base)`.
After installing anaconda, you should log out of your system and log back in. If
the installation worked, your command prompt will be prefixed by the name of the
current anaconda environment - `(base)`.
3. Copy the stable-diffusion source code from GitHub:
```
(base) ~$ git clone https://github.com/lstein/stable-diffusion.git
```
```bash
(base) ~$ git clone https://github.com/lstein/stable-diffusion.git
```
This will create stable-diffusion folder where you will follow the rest of the steps.
This will create stable-diffusion folder where you will follow the rest of the
steps.
4. Enter the newly-created stable-diffusion folder. From this step forward make sure that you are working in the stable-diffusion directory!
4. Enter the newly-created stable-diffusion folder. From this step forward make
sure that you are working in the stable-diffusion directory!
```
(base) ~$ cd stable-diffusion
(base) ~/stable-diffusion$
```
```bash
(base) ~$ cd stable-diffusion
(base) ~/stable-diffusion$
```
5. Use anaconda to copy necessary python packages, create a new python environment named `ldm` and activate the environment.
5. Use anaconda to copy necessary python packages, create a new python
environment named `ldm` and activate the environment.
```
(base) ~/stable-diffusion$ conda env create -f environment.yaml
(base) ~/stable-diffusion$ conda activate ldm
(ldm) ~/stable-diffusion$
```
```bash
(base) ~/stable-diffusion$ conda env create -f environment.yaml
(base) ~/stable-diffusion$ conda activate ldm
(ldm) ~/stable-diffusion$
```
After these steps, your command prompt will be prefixed by `(ldm)` as shown above.
After these steps, your command prompt will be prefixed by `(ldm)` as shown
above.
6. Load a couple of small machine-learning models required by stable diffusion:
```
(ldm) ~/stable-diffusion$ python3 scripts/preload_models.py
```
```bash
(ldm) ~/stable-diffusion$ python3 scripts/preload_models.py
```
Note that this step is necessary because I modified the original just-in-time model loading scheme to allow the script to work on GPU machines that are not internet connected. See [Preload Models](../features/OTHER.md#preload-models)
Note that this step is necessary because I modified the original just-in-time
model loading scheme to allow the script to work on GPU machines that are not
internet connected. See [Preload Models](../features/OTHER.md#preload-models)
7. Now you need to install the weights for the stable diffusion model.
- For running with the released weights, you will first need to set up an acount with Hugging Face (https://huggingface.co).
- Use your credentials to log in, and then point your browser at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.
- You may be asked to sign a license agreement at this point.
- Click on "Files and versions" near the top of the page, and then click on the file named "sd-v1-4.ckpt". You'll be taken to a page that prompts you to click the "download" link. Save the file somewhere safe on your local machine.
- For running with the released weights, you will first need to set up an acount
with [Hugging Face](https://huggingface.co).
- Use your credentials to log in, and then point your browser [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.)
- You may be asked to sign a license agreement at this point.
- Click on "Files and versions" near the top of the page, and then click on the
file named "sd-v1-4.ckpt". You'll be taken to a page that prompts you to click
the "download" link. Save the file somewhere safe on your local machine.
Now run the following commands from within the stable-diffusion directory. This will create a symbolic link from the stable-diffusion model.ckpt file, to the true location of the sd-v1-4.ckpt file.
Now run the following commands from within the stable-diffusion directory.
This will create a symbolic link from the stable-diffusion model.ckpt file, to
the true location of the `sd-v1-4.ckpt` file.
```
(ldm) ~/stable-diffusion$ mkdir -p models/ldm/stable-diffusion-v1
(ldm) ~/stable-diffusion$ ln -sf /path/to/sd-v1-4.ckpt models/ldm/stable-diffusion-v1/model.ckpt
```
```bash
(ldm) ~/stable-diffusion$ mkdir -p models/ldm/stable-diffusion-v1
(ldm) ~/stable-diffusion$ ln -sf /path/to/sd-v1-4.ckpt models/ldm/stable-diffusion-v1/model.ckpt
```
8. Start generating images!
```
# for the pre-release weights use the -l or --liaon400m switch
(ldm) ~/stable-diffusion$ python3 scripts/dream.py -l
```bash
# for the pre-release weights use the -l or --liaon400m switch
(ldm) ~/stable-diffusion$ python3 scripts/dream.py -l
# for the post-release weights do not use the switch
(ldm) ~/stable-diffusion$ python3 scripts/dream.py
# for the post-release weights do not use the switch
(ldm) ~/stable-diffusion$ python3 scripts/dream.py
# for additional configuration switches and arguments, use -h or --help
(ldm) ~/stable-diffusion$ python3 scripts/dream.py -h
```
# for additional configuration switches and arguments, use -h or --help
(ldm) ~/stable-diffusion$ python3 scripts/dream.py -h
```
9. Subsequently, to relaunch the script, be sure to run "conda activate ldm" (step 5, second command), enter the `stable-diffusion` directory, and then launch the dream script (step 8). If you forget to activate the ldm environment, the script will fail with multiple `ModuleNotFound` errors.
9. Subsequently, to relaunch the script, be sure to run "conda activate ldm"
(step 5, second command), enter the `stable-diffusion` directory, and then
launch the dream script (step 8). If you forget to activate the ldm
environment, the script will fail with multiple `ModuleNotFound` errors.
### Updating to newer versions of the script
### Updating to newer versions of the script
This distribution is changing rapidly. If you used the `git clone` method (step 5) to download the stable-diffusion directory, then to update to the latest and greatest version, launch the Anaconda window, enter `stable-diffusion` and type:
This distribution is changing rapidly. If you used the `git clone` method
(step 5) to download the stable-diffusion directory, then to update to the
latest and greatest version, launch the Anaconda window, enter
`stable-diffusion` and type:
```
(ldm) ~/stable-diffusion$ git pull
```
```bash
(ldm) ~/stable-diffusion$ git pull
```
This will bring your local copy into sync with the remote one.
This will bring your local copy into sync with the remote one.

View File

@ -1,37 +1,41 @@
# **macOS Instructions**
---
title: macOS
---
Requirements
## Requirements
- macOS 12.3 Monterey or later
- Python
- Patience
- Apple Silicon\*
- Apple Silicon or Intel Mac
\*I haven't tested any of this on Intel Macs but I have read that one person got it to work, so Apple Silicon might not be requried.
Things have moved really fast and so these instructions change often and are
often out-of-date. One of the problems is that there are so many different ways
to run this.
Things have moved really fast and so these instructions change often
and are often out-of-date. One of the problems is that there are so
many different ways to run this.
We are trying to build a testing setup so that when we make changes it
doesn't always break.
We are trying to build a testing setup so that when we make changes it doesn't
always break.
How to (this hasn't been 100% tested yet):
First get the weights checkpoint download started - it's big:
1. Sign up at https://huggingface.co
2. Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
2. Go to the
[Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
3. Accept the terms and click Access Repository:
4. Download [sd-v1-4.ckpt (4.27 GB)](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/blob/main/sd-v1-4.ckpt) and note where you have saved it (probably the Downloads folder)
4. Download
[sd-v1-4.ckpt (4.27 GB)](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/blob/main/sd-v1-4.ckpt)
and note where you have saved it (probably the Downloads folder)
While that is downloading, open Terminal and run the following commands one at a time.
While that is downloading, open Terminal and run the following commands one
at a time.
```bash
# install brew (and Xcode command line tools):
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
#
# Now there are two different routes to get the Python (miniconda) environment up and running:
# 1. Alongside pyenv
# 2. No pyenv
@ -41,164 +45,235 @@ While that is downloading, open Terminal and run the following commands one at a
# NOW EITHER DO
# 1. Installing alongside pyenv
brew install pyenv-virtualenv # you might have this from before, no problem
pyenv install anaconda3-2022.05
pyenv virtualenv anaconda3-2022.05
eval "$(pyenv init -)"
pyenv activate anaconda3-2022.05
brew install pyenv-virtualenv # you might have this from before, no problem
pyenv install anaconda3-2022.05
pyenv virtualenv anaconda3-2022.05
eval "$(pyenv init -)"
pyenv activate anaconda3-2022.05
# OR,
# 2. Installing standalone
# install python 3, git, cmake, protobuf:
brew install cmake protobuf rust
# install miniconda (M1 arm64 version):
# install miniconda for M1 arm64:
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -o Miniconda3-latest-MacOSX-arm64.sh
/bin/bash Miniconda3-latest-MacOSX-arm64.sh
# OR install miniconda for Intel:
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o Miniconda3-latest-MacOSX-x86_64.sh
/bin/bash Miniconda3-latest-MacOSX-x86_64.sh
# EITHER WAY,
# continue from here
# clone the repo
git clone https://github.com/lstein/stable-diffusion.git
cd stable-diffusion
git clone https://github.com/lstein/stable-diffusion.git
cd stable-diffusion
#
# wait until the checkpoint file has downloaded, then proceed
#
# create symlink to checkpoint
mkdir -p models/ldm/stable-diffusion-v1/
mkdir -p models/ldm/stable-diffusion-v1/
PATH_TO_CKPT="$HOME/Downloads" # or wherever you saved sd-v1-4.ckpt
PATH_TO_CKPT="$HOME/Downloads" # or wherever you saved sd-v1-4.ckpt
ln -s "$PATH_TO_CKPT/sd-v1-4.ckpt" models/ldm/stable-diffusion-v1/model.ckpt
ln -s "$PATH_TO_CKPT/sd-v1-4.ckpt" models/ldm/stable-diffusion-v1/model.ckpt
# install packages
# install packages for arm64
PIP_EXISTS_ACTION=w CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac.yaml
conda activate ldm
# OR install packages for x86_64
PIP_EXISTS_ACTION=w CONDA_SUBDIR=osx-x86_64 conda env create -f environment-mac.yaml
conda activate ldm
# only need to do this once
python scripts/preload_models.py
# run SD!
python scripts/dream.py --full_precision # half-precision requires autocast and won't work
# or run the web interface!
python scripts/dream.py --web
```
The original scripts should work as well.
```
```bash
python scripts/orig_scripts/txt2img.py --prompt "a photograph of an astronaut riding a horse" --plms
```
Note, `export PIP_EXISTS_ACTION=w` is a precaution to fix `conda env
create -f environment-mac.yaml` never finishing in some situations. So
it isn't required but wont hurt.
Note,
After you follow all the instructions and run dream.py you might get several errors. Here's the errors I've seen and found solutions for.
```bash
export PIP_EXISTS_ACTION=w
```
is a precaution to fix
```bash
conda env create -f environment-mac.yaml
```
never finishing in some situations. So it isn't required but wont hurt.
After you follow all the instructions and run dream.py you might get several
errors. Here's the errors I've seen and found solutions for.
---
### Is it slow?
Be sure to specify 1 sample and 1 iteration.
python ./scripts/orig_scripts/txt2img.py --prompt "ocean" --ddim_steps 5 --n_samples 1 --n_iter 1
```bash
python ./scripts/orig_scripts/txt2img.py \
--prompt "ocean" \
--ddim_steps 5 \
--n_samples 1 \
--n_iter 1
```
---
### Doesn't work anymore?
PyTorch nightly includes support for MPS. Because of this, this setup is inherently unstable. One morning I woke up and it no longer worked no matter what I did until I switched to miniforge. However, I have another Mac that works just fine with Anaconda. If you can't get it to work, please search a little first because many of the errors will get posted and solved. If you can't find a solution please [create an issue](https://github.com/lstein/stable-diffusion/issues).
PyTorch nightly includes support for MPS. Because of this, this setup is
inherently unstable. One morning I woke up and it no longer worked no matter
what I did until I switched to miniforge. However, I have another Mac that works
just fine with Anaconda. If you can't get it to work, please search a little
first because many of the errors will get posted and solved. If you can't find a
solution please
[create an issue](https://github.com/lstein/stable-diffusion/issues).
One debugging step is to update to the latest version of PyTorch nightly.
conda install pytorch torchvision torchaudio -c pytorch-nightly
```bash
conda install pytorch torchvision torchaudio -c pytorch-nightly
```
If `conda env create -f environment-mac.yaml` takes forever run this.
If it takes forever to run
git clean -f
```bash
conda env create -f environment-mac.yaml
```
And run this.
you could try to run `git clean -f` followed by:
conda clean --yes --all
`conda clean --yes --all`
Or you could reset Anaconda.
Or you could try to completley reset Anaconda:
conda update --force-reinstall -y -n base -c defaults conda
```bash
conda update --force-reinstall -y -n base -c defaults conda
```
### "No module named cv2", torch, 'ldm', 'transformers', 'taming', etc.
---
### "No module named cv2", torch, 'ldm', 'transformers', 'taming', etc
There are several causes of these errors.
First, did you remember to `conda activate ldm`? If your terminal prompt
begins with "(ldm)" then you activated it. If it begins with "(base)"
or something else you haven't.
- First, did you remember to `conda activate ldm`? If your terminal prompt
begins with "(ldm)" then you activated it. If it begins with "(base)" or
something else you haven't.
Second, you might've run `./scripts/preload_models.py` or `./scripts/dream.py`
instead of `python ./scripts/preload_models.py` or `python ./scripts/dream.py`.
The cause of this error is long so it's below.
- Second, you might've run `./scripts/preload_models.py` or `./scripts/dream.py`
instead of `python ./scripts/preload_models.py` or
`python ./scripts/dream.py`. The cause of this error is long so it's below.
Third, if it says you're missing taming you need to rebuild your virtual
environment.
- Third, if it says you're missing taming you need to rebuild your virtual
environment.
conda env remove -n ldm
conda env create -f environment-mac.yaml
````bash
conda deactivate
Fourth, If you have activated the ldm virtual environment and tried rebuilding it, maybe the problem could be that I have something installed that you don't and you'll just need to manually install it. Make sure you activate the virtual environment so it installs there instead of
globally.
conda env remove -n ldm
PIP_EXISTS_ACTION=w CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac.yaml
```
conda activate ldm
pip install *name*
Fourth, If you have activated the ldm virtual environment and tried rebuilding
it, maybe the problem could be that I have something installed that you don't
and you'll just need to manually install it. Make sure you activate the virtual
environment so it installs there instead of globally.
`conda activate ldm pip install _name_`
You might also need to install Rust (I mention this again below).
---
### How many snakes are living in your computer?
You might have multiple Python installations on your system, in which case it's
important to be explicit and consistent about which one to use for a given project.
This is because virtual environments are coupled to the Python that created it (and all
the associated 'system-level' modules).
important to be explicit and consistent about which one to use for a given
project. This is because virtual environments are coupled to the Python that
created it (and all the associated 'system-level' modules).
When you run `python` or `python3`, your shell searches the colon-delimited locations
in the `PATH` environment variable (`echo $PATH` to see that list) in that order - first match wins.
You can ask for the location of the first `python3` found in your `PATH` with the `which` command like this:
When you run `python` or `python3`, your shell searches the colon-delimited
locations in the `PATH` environment variable (`echo $PATH` to see that list) in
that order - first match wins. You can ask for the location of the first
`python3` found in your `PATH` with the `which` command like this:
% which python3
/usr/bin/python3
```bash
% which python3
/usr/bin/python3
```
Anything in `/usr/bin` is [part of the OS](https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW6). However, `/usr/bin/python3` is not actually python3, but
rather a stub that offers to install Xcode (which includes python 3). If you have Xcode installed already,
`/usr/bin/python3` will execute `/Library/Developer/CommandLineTools/usr/bin/python3` or
Anything in `/usr/bin` is
[part of the OS](https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW6).
However, `/usr/bin/python3` is not actually python3, but rather a stub that
offers to install Xcode (which includes python 3). If you have Xcode installed
already, `/usr/bin/python3` will execute
`/Library/Developer/CommandLineTools/usr/bin/python3` or
`/Applications/Xcode.app/Contents/Developer/usr/bin/python3` (depending on which
Xcode you've selected with `xcode-select`).
Note that `/usr/bin/python` is an entirely different python - specifically, python 2. Note: starting in
macOS 12.3, `/usr/bin/python` no longer exists.
Note that `/usr/bin/python` is an entirely different python - specifically,
python 2. Note: starting in macOS 12.3, `/usr/bin/python` no longer exists.
% which python3
/opt/homebrew/bin/python3
```bash
% which python3
/opt/homebrew/bin/python3
```
If you installed python3 with Homebrew and you've modified your path to search
for Homebrew binaries before system ones, you'll see the above path.
% which python
/opt/anaconda3/bin/python
```bash
% which python
/opt/anaconda3/bin/python
```
If you have Anaconda installed, you will see the above path. There is a
`/opt/anaconda3/bin/python3` also. We expect that `/opt/anaconda3/bin/python`
and `/opt/anaconda3/bin/python3` should actually be the *same python*, which you can
verify by comparing the output of `python3 -V` and `python -V`.
`/opt/anaconda3/bin/python3` also.
(ldm) % which python
/Users/name/miniforge3/envs/ldm/bin/python
We expect that `/opt/anaconda3/bin/python` and `/opt/anaconda3/bin/python3`
should actually be the _same python_, which you can verify by comparing the
output of `python3 -V` and `python -V`.
The above is what you'll see if you have miniforge and you've correctly activated
the ldm environment, and you used option 2 in the setup instructions above ("no pyenv").
```bash
(ldm) % which python
/Users/name/miniforge3/envs/ldm/bin/python
```
The above is what you'll see if you have miniforge and you've correctly
activated the ldm environment, and you used option 2 in the setup instructions
above ("no pyenv").
```bash
(anaconda3-2022.05) % which python
/Users/name/.pyenv/shims/python
```
(anaconda3-2022.05) % which python
/Users/name/.pyenv/shims/python
... and the above is what you'll see if you used option 1 ("Alongside pyenv").
It's all a mess and you should know [how to modify the path environment variable](https://support.apple.com/guide/terminal/use-environment-variables-apd382cc5fa-4f58-4449-b20a-41c53c006f8f/mac)
It's all a mess and you should know
[how to modify the path environment variable](https://support.apple.com/guide/terminal/use-environment-variables-apd382cc5fa-4f58-4449-b20a-41c53c006f8f/mac)
if you want to fix it. Here's a brief hint of all the ways you can modify it
(don't really have the time to explain it all here).
@ -211,18 +286,19 @@ if you want to fix it. Here's a brief hint of all the ways you can modify it
Which one you use will depend on what you have installed except putting a file
in /etc/paths.d is what I prefer to do.
Finally, to answer the question posed by this section's title, it may help to list
all of the `python` / `python3` things found in `$PATH` instead of just the one that
will be executed by default. To do that, add the `-a` switch to `which`:
Finally, to answer the question posed by this section's title, it may help to
list all of the `python` / `python3` things found in `$PATH` instead of just the
one that will be executed by default. To do that, add the `-a` switch to
`which`:
% which -a python3
...
### Debugging?
Tired of waiting for your renders to finish before you can see if it
works? Reduce the steps! The image quality will be horrible but at least you'll
get quick feedback.
Tired of waiting for your renders to finish before you can see if it works?
Reduce the steps! The image quality will be horrible but at least you'll get
quick feedback.
python ./scripts/txt2img.py --prompt "ocean" --ddim_steps 5 --n_samples 1 --n_iter 1
@ -235,15 +311,24 @@ get quick feedback.
Example error.
```
...
NotImplementedError: The operator 'aten::_index_put_impl_' is not current implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on [https://github.com/pytorch/pytorch/issues/77764](https://github.com/pytorch/pytorch/issues/77764). As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.
... NotImplementedError: The operator 'aten::_index_put_impl_' is not current
implemented for the MPS device. If you want this op to be added in priority
during the prototype phase of this feature, please comment on
[https://github.com/pytorch/pytorch/issues/77764](https://github.com/pytorch/pytorch/issues/77764).
As a temporary fix, you can set the environment variable
`PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op.
WARNING: this will be slower than running natively on MPS.
```
The lstein branch includes this fix in [environment-mac.yaml](https://github.com/lstein/stable-diffusion/blob/main/environment-mac.yaml).
The lstein branch includes this fix in
[environment-mac.yaml](https://github.com/lstein/stable-diffusion/blob/main/environment-mac.yaml).
### "Could not build wheels for tokenizers"
I have not seen this error because I had Rust installed on my computer before I started playing with Stable Diffusion. The fix is to install Rust.
I have not seen this error because I had Rust installed on my computer before I
started playing with Stable Diffusion. The fix is to install Rust.
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
@ -251,10 +336,9 @@ I have not seen this error because I had Rust installed on my computer before I
First this:
> Completely reproducible results are not guaranteed across PyTorch
> releases, individual commits, or different platforms. Furthermore,
> results may not be reproducible between CPU and GPU executions, even
> when using identical seeds.
> Completely reproducible results are not guaranteed across PyTorch releases,
> individual commits, or different platforms. Furthermore, results may not be
> reproducible between CPU and GPU executions, even when using identical seeds.
[PyTorch docs](https://pytorch.org/docs/stable/notes/randomness.html)
@ -265,53 +349,56 @@ still working on it.
OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized.
You are likely using an Intel package by mistake. Be sure to run conda with
the environment variable `CONDA_SUBDIR=osx-arm64`, like so:
You are likely using an Intel package by mistake. Be sure to run conda with the
environment variable `CONDA_SUBDIR=osx-arm64`, like so:
`CONDA_SUBDIR=osx-arm64 conda install ...`
This error happens with Anaconda on Macs when the Intel-only `mkl` is pulled in by
a dependency. [nomkl](https://stackoverflow.com/questions/66224879/what-is-the-nomkl-python-package-used-for)
This error happens with Anaconda on Macs when the Intel-only `mkl` is pulled in
by a dependency.
[nomkl](https://stackoverflow.com/questions/66224879/what-is-the-nomkl-python-package-used-for)
is a metapackage designed to prevent this, by making it impossible to install
`mkl`, but if your environment is already broken it may not work.
Do _not_ use `os.environ['KMP_DUPLICATE_LIB_OK']='True'` or equivalents as this
masks the underlying issue of using Intel packages.
### Not enough memory.
### Not enough memory
This seems to be a common problem and is probably the underlying
problem for a lot of symptoms (listed below). The fix is to lower your
image size or to add `model.half()` right after the model is loaded. I
should probably test it out. I've read that the reason this fixes
problems is because it converts the model from 32-bit to 16-bit and
that leaves more RAM for other things. I have no idea how that would
affect the quality of the images though.
This seems to be a common problem and is probably the underlying problem for a
lot of symptoms (listed below). The fix is to lower your image size or to add
`model.half()` right after the model is loaded. I should probably test it out.
I've read that the reason this fixes problems is because it converts the model
from 32-bit to 16-bit and that leaves more RAM for other things. I have no idea
how that would affect the quality of the images though.
See [this issue](https://github.com/CompVis/stable-diffusion/issues/71).
### "Error: product of dimension sizes > 2\*\*31'"
This error happens with img2img, which I haven't played with too much
yet. But I know it's because your image is too big or the resolution
isn't a multiple of 32x32. Because the stable-diffusion model was
trained on images that were 512 x 512, it's always best to use that
output size (which is the default). However, if you're using that size
and you get the above error, try 256 x 256 or 512 x 256 or something
as the source image.
This error happens with img2img, which I haven't played with too much yet. But I
know it's because your image is too big or the resolution isn't a multiple of
32x32. Because the stable-diffusion model was trained on images that were 512 x
512, it's always best to use that output size (which is the default). However,
if you're using that size and you get the above error, try 256 x 256 or 512 x
256 or something as the source image.
BTW, 2\*\*31-1 = [2,147,483,647](https://en.wikipedia.org/wiki/2,147,483,647#In_computing), which is also 32-bit signed [LONG_MAX](https://en.wikipedia.org/wiki/C_data_types) in C.
BTW, 2\*\*31-1 =
[2,147,483,647](https://en.wikipedia.org/wiki/2,147,483,647#In_computing), which
is also 32-bit signed [LONG_MAX](https://en.wikipedia.org/wiki/C_data_types) in
C.
### I just got Rickrolled! Do I have a virus?
You don't have a virus. It's part of the project. Here's
[Rick](https://github.com/lstein/stable-diffusion/blob/main/assets/rick.jpeg)
and here's [the
code](https://github.com/lstein/stable-diffusion/blob/69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc/scripts/txt2img.py#L79)
that swaps him in. It's a NSFW filter, which IMO, doesn't work very
good (and we call this "computer vision", sheesh).
and here's
[the code](https://github.com/lstein/stable-diffusion/blob/69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc/scripts/txt2img.py#L79)
that swaps him in. It's a NSFW filter, which IMO, doesn't work very good (and we
call this "computer vision", sheesh).
Actually, this could be happening because there's not enough RAM. You could try the `model.half()` suggestion or specify smaller output images.
Actually, this could be happening because there's not enough RAM. You could try
the `model.half()` suggestion or specify smaller output images.
### My images come out black
@ -319,31 +406,29 @@ We might have this fixed, we are still testing.
There's a [similar issue](https://github.com/CompVis/stable-diffusion/issues/69)
on CUDA GPU's where the images come out green. Maybe it's the same issue?
Someone in that issue says to use "--precision full", but this fork
actually disables that flag. I don't know why, someone else provided
that code and I don't know what it does. Maybe the `model.half()`
suggestion above would fix this issue too. I should probably test it.
Someone in that issue says to use "--precision full", but this fork actually
disables that flag. I don't know why, someone else provided that code and I
don't know what it does. Maybe the `model.half()` suggestion above would fix
this issue too. I should probably test it.
### "view size is not compatible with input tensor's size and stride"
```
File "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py", line 2511, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
```bash
File "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py", line 2511, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
```
Update to the latest version of lstein/stable-diffusion. We were
patching pytorch but we found a file in stable-diffusion that we could
change instead. This is a 32-bit vs 16-bit problem.
Update to the latest version of lstein/stable-diffusion. We were patching
pytorch but we found a file in stable-diffusion that we could change instead.
This is a 32-bit vs 16-bit problem.
---
### The processor must support the Intel bla bla bla
What? Intel? On an Apple Silicon?
Intel MKL FATAL ERROR: This system does not meet the minimum requirements for use of the Intel(R) Math Kernel Library.
The processor must support the Intel(R) Supplemental Streaming SIMD Extensions 3 (Intel(R) SSSE3) instructions.
The processor must support the Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) instructions.
The processor must support the Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions.
`bash Intel MKL FATAL ERROR: This system does not meet the minimum requirements for use of the Intel(R) Math Kernel Library. The processor must support the Intel(R) Supplemental Streaming SIMD Extensions 3 (Intel(R) SSSE3) instructions. The processor must support the Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) instructions. The processor must support the Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions. `
This is due to the Intel `mkl` package getting picked up when you try to install
something that depends on it-- Rosetta can translate some Intel instructions but
@ -351,11 +436,13 @@ not the specialized ones here. To avoid this, make sure to use the environment
variable `CONDA_SUBDIR=osx-arm64`, which restricts the Conda environment to only
use ARM packages, and use `nomkl` as described above.
---
### input types 'tensor<2x1280xf32>' and 'tensor<\*xf16>' are not broadcast compatible
May appear when just starting to generate, e.g.:
```
```bash
dream> clouds
Generating: 0%| | 0/1 [00:00<?, ?it/s]/Users/[...]/dev/stable-diffusion/ldm/modules/embedding_manager.py:152: UserWarning: The operator 'aten::nonzero' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1662016319283/work/aten/src/ATen/mps/MPSFallback.mm:11.)
placeholder_idx = torch.where(
@ -366,4 +453,5 @@ Abort trap: 6
warnings.warn('resource_tracker: There appear to be %d '
```
Macs do not support autocast/mixed-precision. Supply `--full_precision` to use float32 everywhere.
Macs do not support `autocast/mixed-precision`, so you need to supply
`--full_precision` to use float32 everywhere.

View File

@ -1,110 +1,135 @@
# **Windows Installation**
---
title: Windows
---
## **Notebook install (semi-automated)**
We have a [Jupyter
notebook](https://github.com/lstein/stable-diffusion/blob/main/notebooks/Stable-Diffusion-local-Windows.ipynb)
with cell-by-cell installation steps. It will download the code in
this repo as one of the steps, so instead of cloning this repo, simply
download the notebook from the link above and load it up in VSCode
(with the appropriate extensions installed)/Jupyter/JupyterLab and
start running the cells one-by-one.
We have a
[Jupyter notebook](https://github.com/lstein/stable-diffusion/blob/main/notebooks/Stable-Diffusion-local-Windows.ipynb)
with cell-by-cell installation steps. It will download the code in this repo as
one of the steps, so instead of cloning this repo, simply download the notebook
from the link above and load it up in VSCode (with the appropriate extensions
installed)/Jupyter/JupyterLab and start running the cells one-by-one.
Note that you will need NVIDIA drivers, Python 3.10, and Git installed
beforehand - simplified [step-by-step
instructions](https://github.com/lstein/stable-diffusion/wiki/Easy-peasy-Windows-install)
beforehand - simplified
[step-by-step instructions](https://github.com/lstein/stable-diffusion/wiki/Easy-peasy-Windows-install)
are available in the wiki (you'll only need steps 1, 2, & 3 ).
## **Manual Install**
### **pip**
See [Easy-peasy Windows install](https://github.com/lstein/stable-diffusion/wiki/Easy-peasy-Windows-install)
See
[Easy-peasy Windows install](https://github.com/lstein/stable-diffusion/wiki/Easy-peasy-Windows-install)
in the wiki
---
### **Conda**
1. Install Anaconda3 (miniconda3 version) from here: https://docs.anaconda.com/anaconda/install/windows/
1. Install Anaconda3 (miniconda3 version) from here:
https://docs.anaconda.com/anaconda/install/windows/
2. Install Git from here: https://git-scm.com/download/win
3. Launch Anaconda from the Windows Start menu. This will bring up a command window. Type all the remaining commands in this window.
3. Launch Anaconda from the Windows Start menu. This will bring up a command
window. Type all the remaining commands in this window.
4. Run the command:
```
git clone https://github.com/lstein/stable-diffusion.git
```
```bash
git clone https://github.com/lstein/stable-diffusion.git
```
This will create stable-diffusion folder where you will follow the rest of the steps.
This will create stable-diffusion folder where you will follow the rest of
the steps.
5. Enter the newly-created stable-diffusion folder. From this step forward make sure that you are working in the stable-diffusion directory!
5. Enter the newly-created stable-diffusion folder. From this step forward make
sure that you are working in the stable-diffusion directory!
```
cd stable-diffusion
```
```bash
cd stable-diffusion
```
6. Run the following two commands:
```
conda env create -f environment.yaml (step 6a)
conda activate ldm (step 6b)
```
```bash
conda env create -f environment.yaml (step 6a)
conda activate ldm (step 6b)
```
This will install all python requirements and activate the "ldm"
environment which sets PATH and other environment variables properly.
This will install all python requirements and activate the "ldm" environment
which sets PATH and other environment variables properly.
7. Run the command:
```
python scripts\preload_models.py
```
```bash
python scripts\preload_models.py
```
This installs several machine learning models that stable diffusion requires.
This installs several machine learning models that stable diffusion requires.
Note: This step is required. This was done because some users may might be blocked by firewalls or have limited internet connectivity for the models to be downloaded just-in-time.
Note: This step is required. This was done because some users may might be
blocked by firewalls or have limited internet connectivity for the models to
be downloaded just-in-time.
8. Now you need to install the weights for the big stable diffusion model.
- For running with the released weights, you will first need to set up an acount with Hugging Face (https://huggingface.co).
- Use your credentials to log in, and then point your browser at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.
- You may be asked to sign a license agreement at this point.
- Click on "Files and versions" near the top of the page, and then click on the file named `sd-v1-4.ckpt`. You'll be taken to a page that
prompts you to click the "download" link. Now save the file somewhere safe on your local machine.
- The weight file is >4 GB in size, so
downloading may take a while.
- For running with the released weights, you will first need to set up an
acount with Hugging Face (https://huggingface.co).
- Use your credentials to log in, and then point your browser at
https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.
- You may be asked to sign a license agreement at this point.
- Click on "Files and versions" near the top of the page, and then click on
the file named `sd-v1-4.ckpt`. You'll be taken to a page that prompts you
to click the "download" link. Now save the file somewhere safe on your
local machine.
- The weight file is >4 GB in size, so downloading may take a while.
Now run the following commands from **within the stable-diffusion directory** to copy the weights file to the right place:
Now run the following commands from **within the stable-diffusion directory**
to copy the weights file to the right place:
```
mkdir -p models\ldm\stable-diffusion-v1
copy C:\path\to\sd-v1-4.ckpt models\ldm\stable-diffusion-v1\model.ckpt
```
```bash
mkdir -p models\ldm\stable-diffusion-v1
copy C:\path\to\sd-v1-4.ckpt models\ldm\stable-diffusion-v1\model.ckpt
```
Please replace `C:\path\to\sd-v1.4.ckpt` with the correct path to wherever you stashed this file. If you prefer not to copy or move the .ckpt file,
you may instead create a shortcut to it from within `models\ldm\stable-diffusion-v1\`.
Please replace `C:\path\to\sd-v1.4.ckpt` with the correct path to wherever
you stashed this file. If you prefer not to copy or move the .ckpt file, you
may instead create a shortcut to it from within
`models\ldm\stable-diffusion-v1\`.
9. Start generating images!
```
# for the pre-release weights
python scripts\dream.py -l
```bash
# for the pre-release weights
python scripts\dream.py -l
# for the post-release weights
python scripts\dream.py
```
# for the post-release weights
python scripts\dream.py
```
10. Subsequently, to relaunch the script, first activate the Anaconda command window (step 3),enter the stable-diffusion directory (step 5, `cd \path\to\stable-diffusion`), run `conda activate ldm` (step 6b), and then launch the dream script (step 9).
10. Subsequently, to relaunch the script, first activate the Anaconda command
window (step 3),enter the stable-diffusion directory (step 5,
`cd \path\to\stable-diffusion`), run `conda activate ldm` (step 6b), and
then launch the dream script (step 9).
**Note:** Tildebyte has written an alternative ["Easy peasy Windows
install"](https://github.com/lstein/stable-diffusion/wiki/Easy-peasy-Windows-install)
which uses the Windows Powershell and pew. If you are having trouble with Anaconda on Windows, give this a try (or try it first!)
**Note:** Tildebyte has written an alternative
["Easy peasy Windows install"](https://github.com/lstein/stable-diffusion/wiki/Easy-peasy-Windows-install)
which uses the Windows Powershell and pew. If you are having trouble with
Anaconda on Windows, give this a try (or try it first!)
---
### Updating to newer versions of the script
This distribution is changing rapidly. If you used the `git clone` method (step 5) to download the stable-diffusion directory, then to update to the latest and greatest version, launch the Anaconda window, enter `stable-diffusion`, and type:
This distribution is changing rapidly. If you used the `git clone` method
(step 5) to download the stable-diffusion directory, then to update to the
latest and greatest version, launch the Anaconda window, enter
`stable-diffusion`, and type:
```
```bash
git pull
conda env update -f environment.yaml
```

73
docs/openapi3_0.yaml Normal file
View File

@ -0,0 +1,73 @@
openapi: 3.0.3
info:
title: Stable Diffusion
description: |-
TODO: Description Here
Some useful links:
- [Stable Diffusion Dream Server](https://github.com/lstein/stable-diffusion)
license:
name: MIT License
url: https://github.com/lstein/stable-diffusion/blob/main/LICENSE
version: 1.0.0
servers:
- url: http://localhost:9090/api
tags:
- name: images
description: Retrieve and manage generated images
paths:
/images/{imageId}:
get:
tags:
- images
summary: Get image by ID
description: Returns a single image
operationId: getImageById
parameters:
- name: imageId
in: path
description: ID of image to return
required: true
schema:
type: string
responses:
'200':
description: successful operation
content:
image/png:
schema:
type: string
format: binary
'404':
description: Image not found
/intermediates/{intermediateId}/{step}:
get:
tags:
- images
summary: Get intermediate image by ID
description: Returns a single intermediate image
operationId: getIntermediateById
parameters:
- name: intermediateId
in: path
description: ID of intermediate to return
required: true
schema:
type: string
- name: step
in: path
description: The generation step of the intermediate
required: true
schema:
type: string
responses:
'200':
description: successful operation
content:
image/png:
schema:
type: string
format: binary
'404':
description: Intermediate not found

View File

@ -1,14 +1,16 @@
# Contributors
---
title: Contributors
---
The list of all the amazing people who have contributed to the various features that you get to experience in this fork.
We thank them for all of their time and hard work.
_Original Author:_
## __Original Author:__
- Lincoln D. Stein <lincoln.stein@gmail.com>
- [Lincoln D. Stein](mailto:lincoln.stein@gmail.com)
_Contributions by:_
## __Contributions by:__
- [Sean McLellan](https://github.com/Oceanswave)
- [Kevin Gibbons](https://github.com/bakkot)
@ -48,8 +50,10 @@ _Contributions by:_
- [Mihai](https://github.com/mh-dm)
- [Any Winter](https://github.com/any-winter-4079)
- [Doggettx](https://github.com/doggettx)
- [Matthias Wild](https://github.com/mauwii)
- [Kyle Schouviller](https://github.com/kyle0654)
_Original CompVis Authors:_
## __Original CompVis Authors:__
- [Robin Rombach](https://github.com/rromb)
- [Patrick von Platen](https://github.com/patrickvonplaten)

View File

@ -1,6 +1,12 @@
# Original README from CompViz/stable-diffusion
---
title: CompViz-Readme
---
_Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:_
# _README from [CompViz/stable-diffusion](https://github.com/CompVis/stable-diffusion)_
_Stable Diffusion was made possible thanks to a collaboration with
[Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and
builds upon our previous work:_
[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/)<br/>
[Robin Rombach](https://github.com/rromb)\*,
@ -9,32 +15,40 @@ _Stable Diffusion was made possible thanks to a collaboration with [Stability AI
[Patrick Esser](https://github.com/pesser),
[Björn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)<br/>
**CVPR '22 Oral**
## **CVPR '22 Oral**
which is available on [GitHub](https://github.com/CompVis/latent-diffusion). PDF at [arXiv](https://arxiv.org/abs/2112.10752). Please also visit our [Project page](https://ommer-lab.com/research/latent-diffusion-models/).
which is available on [GitHub](https://github.com/CompVis/latent-diffusion). PDF
at [arXiv](https://arxiv.org/abs/2112.10752). Please also visit our
[Project page](https://ommer-lab.com/research/latent-diffusion-models/).
![txt2img-stable2](../assets/stable-samples/txt2img/merged-0006.png)
[Stable Diffusion](#stable-diffusion-v1) is a latent text-to-image diffusion
model.
Thanks to a generous compute donation from [Stability AI](https://stability.ai/) and support from [LAION](https://laion.ai/), we were able to train a Latent Diffusion Model on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) database.
Similar to Google's [Imagen](https://arxiv.org/abs/2205.11487),
this model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text prompts.
With its 860M UNet and 123M text encoder, the model is relatively lightweight and runs on a GPU with at least 10GB VRAM.
See [this section](#stable-diffusion-v1) below and the [model card](https://huggingface.co/CompVis/stable-diffusion).
model. Thanks to a generous compute donation from
[Stability AI](https://stability.ai/) and support from
[LAION](https://laion.ai/), we were able to train a Latent Diffusion Model on
512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/)
database. Similar to Google's [Imagen](https://arxiv.org/abs/2205.11487), this
model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text
prompts. With its 860M UNet and 123M text encoder, the model is relatively
lightweight and runs on a GPU with at least 10GB VRAM. See
[this section](#stable-diffusion-v1) below and the
[model card](https://huggingface.co/CompVis/stable-diffusion).
## Requirements
A suitable [conda](https://conda.io/) environment named `ldm` can be created
and activated with:
A suitable [conda](https://conda.io/) environment named `ldm` can be created and
activated with:
```
```bash
conda env create -f environment.yaml
conda activate ldm
```
You can also update an existing [latent diffusion](https://github.com/CompVis/latent-diffusion) environment by running
You can also update an existing
[latent diffusion](https://github.com/CompVis/latent-diffusion) environment by
running
```
```bash
conda install pytorch torchvision -c pytorch
pip install transformers==4.19.2
pip install -e .
@ -42,42 +56,57 @@ pip install -e .
## Stable Diffusion v1
Stable Diffusion v1 refers to a specific configuration of the model
architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet
and CLIP ViT-L/14 text encoder for the diffusion model. The model was pretrained on 256x256 images and
then finetuned on 512x512 images.
Stable Diffusion v1 refers to a specific configuration of the model architecture
that uses a downsampling-factor 8 autoencoder with an 860M UNet and CLIP
ViT-L/14 text encoder for the diffusion model. The model was pretrained on
256x256 images and then finetuned on 512x512 images.
\*Note: Stable Diffusion v1 is a general text-to-image diffusion model and therefore mirrors biases and (mis-)conceptions that are present
in its training data.
Details on the training procedure and data, as well as the intended use of the model can be found in the corresponding [model card](https://huggingface.co/CompVis/stable-diffusion).
Research into the safe deployment of general text-to-image models is an ongoing effort. To prevent misuse and harm, we currently provide access to the checkpoints only for [academic research purposes upon request](https://stability.ai/academia-access-form).
**This is an experiment in safe and community-driven publication of a capable and general text-to-image model. We are working on a public release with a more permissive license that also incorporates ethical considerations.\***
\*Note: Stable Diffusion v1 is a general text-to-image diffusion model and
therefore mirrors biases and (mis-)conceptions that are present in its training
data. Details on the training procedure and data, as well as the intended use of
the model can be found in the corresponding
[model card](https://huggingface.co/CompVis/stable-diffusion). Research into the
safe deployment of general text-to-image models is an ongoing effort. To prevent
misuse and harm, we currently provide access to the checkpoints only for
[academic research purposes upon request](https://stability.ai/academia-access-form).
**This is an experiment in safe and community-driven publication of a capable
and general text-to-image model. We are working on a public release with a more
permissive license that also incorporates ethical considerations.\***
[Request access to Stable Diffusion v1 checkpoints for academic research](https://stability.ai/academia-access-form)
### Weights
We currently provide three checkpoints, `sd-v1-1.ckpt`, `sd-v1-2.ckpt` and `sd-v1-3.ckpt`,
which were trained as follows,
We currently provide three checkpoints, `sd-v1-1.ckpt`, `sd-v1-2.ckpt` and
`sd-v1-3.ckpt`, which were trained as follows,
- `sd-v1-1.ckpt`: 237k steps at resolution `256x256` on [laion2B-en](https://huggingface.co/datasets/laion/laion2B-en).
194k steps at resolution `512x512` on [laion-high-resolution](https://huggingface.co/datasets/laion/laion-high-resolution) (170M examples from LAION-5B with resolution `>= 1024x1024`).
- `sd-v1-2.ckpt`: Resumed from `sd-v1-1.ckpt`.
515k steps at resolution `512x512` on "laion-improved-aesthetics" (a subset of laion2B-en,
filtered to images with an original size `>= 512x512`, estimated aesthetics score `> 5.0`, and an estimated watermark probability `< 0.5`. The watermark estimate is from the LAION-5B metadata, the aesthetics score is estimated using an [improved aesthetics estimator](https://github.com/christophschuhmann/improved-aesthetic-predictor)).
- `sd-v1-3.ckpt`: Resumed from `sd-v1-2.ckpt`. 195k steps at resolution `512x512` on "laion-improved-aesthetics" and 10\% dropping of the text-conditioning to improve [classifier-free guidance sampling](https://arxiv.org/abs/2207.12598).
- `sd-v1-1.ckpt`: 237k steps at resolution `256x256` on
[laion2B-en](https://huggingface.co/datasets/laion/laion2B-en). 194k steps at
resolution `512x512` on
[laion-high-resolution](https://huggingface.co/datasets/laion/laion-high-resolution)
(170M examples from LAION-5B with resolution `>= 1024x1024`).
- `sd-v1-2.ckpt`: Resumed from `sd-v1-1.ckpt`. 515k steps at resolution
`512x512` on "laion-improved-aesthetics" (a subset of laion2B-en, filtered to
images with an original size `>= 512x512`, estimated aesthetics score `> 5.0`,
and an estimated watermark probability `< 0.5`. The watermark estimate is from
the LAION-5B metadata, the aesthetics score is estimated using an
[improved aesthetics estimator](https://github.com/christophschuhmann/improved-aesthetic-predictor)).
- `sd-v1-3.ckpt`: Resumed from `sd-v1-2.ckpt`. 195k steps at resolution
`512x512` on "laion-improved-aesthetics" and 10\% dropping of the
text-conditioning to improve
[classifier-free guidance sampling](https://arxiv.org/abs/2207.12598).
Evaluations with different classifier-free guidance scales (1.5, 2.0, 3.0, 4.0,
5.0, 6.0, 7.0, 8.0) and 50 PLMS sampling
steps show the relative improvements of the checkpoints:
![sd evaluation results](../assets/v1-variants-scores.jpg)
5.0, 6.0, 7.0, 8.0) and 50 PLMS sampling steps show the relative improvements of
the checkpoints: ![sd evaluation results](../assets/v1-variants-scores.jpg)
### Text-to-Image with Stable Diffusion
![txt2img-stable2](../assets/stable-samples/txt2img/merged-0005.png)
![txt2img-stable2](../assets/stable-samples/txt2img/merged-0007.png)
Stable Diffusion is a latent diffusion model conditioned on the (non-pooled) text embeddings of a CLIP ViT-L/14 text encoder.
Stable Diffusion is a latent diffusion model conditioned on the (non-pooled)
text embeddings of a CLIP ViT-L/14 text encoder.
#### Sampling Script
@ -94,8 +123,11 @@ and sample with
python scripts/txt2img.py --prompt "a photograph of an astronaut riding a horse" --plms
```
By default, this uses a guidance scale of `--scale 7.5`, [Katherine Crowson's implementation](https://github.com/CompVis/latent-diffusion/pull/51) of the [PLMS](https://arxiv.org/abs/2202.09778) sampler,
and renders images of size 512x512 (which it was trained on) in 50 steps. All supported arguments are listed below (type `python scripts/txt2img.py --help`).
By default, this uses a guidance scale of `--scale 7.5`,
[Katherine Crowson's implementation](https://github.com/CompVis/latent-diffusion/pull/51)
of the [PLMS](https://arxiv.org/abs/2202.09778) sampler, and renders images of
size 512x512 (which it was trained on) in 50 steps. All supported arguments are
listed below (type `python scripts/txt2img.py --help`).
```commandline
usage: txt2img.py [-h] [--prompt [PROMPT]] [--outdir [OUTDIR]] [--skip_grid] [--skip_save] [--ddim_steps DDIM_STEPS] [--plms] [--laion400m] [--fixed_code] [--ddim_eta DDIM_ETA] [--n_iter N_ITER] [--H H] [--W W] [--C C] [--f F] [--n_samples N_SAMPLES] [--n_rows N_ROWS]
@ -133,14 +165,17 @@ optional arguments:
```
Note: The inference config for all v1 versions is designed to be used with EMA-only checkpoints.
For this reason `use_ema=False` is set in the configuration, otherwise the code will try to switch from
non-EMA to EMA weights. If you want to examine the effect of EMA vs no EMA, we provide "full" checkpoints
which contain both types of weights. For these, `use_ema=False` will load and use the non-EMA weights.
Note: The inference config for all v1 versions is designed to be used with
EMA-only checkpoints. For this reason `use_ema=False` is set in the
configuration, otherwise the code will try to switch from non-EMA to EMA
weights. If you want to examine the effect of EMA vs no EMA, we provide "full"
checkpoints which contain both types of weights. For these, `use_ema=False` will
load and use the non-EMA weights.
#### Diffusers Integration
Another way to download and sample Stable Diffusion is by using the [diffusers library](https://github.com/huggingface/diffusers/tree/main#new--stable-diffusion-is-now-fully-compatible-with-diffusers)
Another way to download and sample Stable Diffusion is by using the
[diffusers library](https://github.com/huggingface/diffusers/tree/main#new--stable-diffusion-is-now-fully-compatible-with-diffusers)
```py
# make sure you're logged in with `huggingface-cli login`
@ -161,18 +196,23 @@ image.save("astronaut_rides_horse.png")
### Image Modification with Stable Diffusion
By using a diffusion-denoising mechanism as first proposed by [SDEdit](https://arxiv.org/abs/2108.01073), the model can be used for different
tasks such as text-guided image-to-image translation and upscaling. Similar to the txt2img sampling script,
we provide a script to perform image modification with Stable Diffusion.
By using a diffusion-denoising mechanism as first proposed by
[SDEdit](https://arxiv.org/abs/2108.01073), the model can be used for different
tasks such as text-guided image-to-image translation and upscaling. Similar to
the txt2img sampling script, we provide a script to perform image modification
with Stable Diffusion.
The following describes an example where a rough sketch made in [Pinta](https://www.pinta-project.com/) is converted into a detailed artwork.
The following describes an example where a rough sketch made in
[Pinta](https://www.pinta-project.com/) is converted into a detailed artwork.
```
python scripts/img2img.py --prompt "A fantasy landscape, trending on artstation" --init-img <path-to-img.jpg> --strength 0.8
```
Here, strength is a value between 0.0 and 1.0, that controls the amount of noise that is added to the input image.
Values that approach 1.0 allow for lots of variations but will also produce images that are not semantically consistent with the input. See the following example.
Here, strength is a value between 0.0 and 1.0, that controls the amount of noise
that is added to the input image. Values that approach 1.0 allow for lots of
variations but will also produce images that are not semantically consistent
with the input. See the following example.
**Input**
@ -183,15 +223,19 @@ Values that approach 1.0 allow for lots of variations but will also produce imag
![out3](../assets/stable-samples/img2img/mountains-3.png)
![out2](../assets/stable-samples/img2img/mountains-2.png)
This procedure can, for example, also be used to upscale samples from the base model.
This procedure can, for example, also be used to upscale samples from the base
model.
## Comments
- Our codebase for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion)
and [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch).
- Our codebase for the diffusion models builds heavily on
[OpenAI's ADM codebase](https://github.com/openai/guided-diffusion) and
[https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch).
Thanks for open-sourcing!
- The implementation of the transformer encoder is from [x-transformers](https://github.com/lucidrains/x-transformers) by [lucidrains](https://github.com/lucidrains?tab=repositories).
- The implementation of the transformer encoder is from
[x-transformers](https://github.com/lucidrains/x-transformers) by
[lucidrains](https://github.com/lucidrains?tab=repositories).
## BibTeX

Binary file not shown.

After

Width:  |  Height:  |  Size: 665 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 628 B

16
docs/swagger-ui/index.css Normal file
View File

@ -0,0 +1,16 @@
html {
box-sizing: border-box;
overflow: -moz-scrollbars-vertical;
overflow-y: scroll;
}
*,
*:before,
*:after {
box-sizing: inherit;
}
body {
margin: 0;
background: #fafafa;
}

View File

@ -0,0 +1,79 @@
<!doctype html>
<html lang="en-US">
<head>
<title>Swagger UI: OAuth2 Redirect</title>
</head>
<body>
<script>
'use strict';
function run () {
var oauth2 = window.opener.swaggerUIRedirectOauth2;
var sentState = oauth2.state;
var redirectUrl = oauth2.redirectUrl;
var isValid, qp, arr;
if (/code|token|error/.test(window.location.hash)) {
qp = window.location.hash.substring(1).replace('?', '&');
} else {
qp = location.search.substring(1);
}
arr = qp.split("&");
arr.forEach(function (v,i,_arr) { _arr[i] = '"' + v.replace('=', '":"') + '"';});
qp = qp ? JSON.parse('{' + arr.join() + '}',
function (key, value) {
return key === "" ? value : decodeURIComponent(value);
}
) : {};
isValid = qp.state === sentState;
if ((
oauth2.auth.schema.get("flow") === "accessCode" ||
oauth2.auth.schema.get("flow") === "authorizationCode" ||
oauth2.auth.schema.get("flow") === "authorization_code"
) && !oauth2.auth.code) {
if (!isValid) {
oauth2.errCb({
authId: oauth2.auth.name,
source: "auth",
level: "warning",
message: "Authorization may be unsafe, passed state was changed in server. The passed state wasn't returned from auth server."
});
}
if (qp.code) {
delete oauth2.state;
oauth2.auth.code = qp.code;
oauth2.callback({auth: oauth2.auth, redirectUrl: redirectUrl});
} else {
let oauthErrorMsg;
if (qp.error) {
oauthErrorMsg = "["+qp.error+"]: " +
(qp.error_description ? qp.error_description+ ". " : "no accessCode received from the server. ") +
(qp.error_uri ? "More info: "+qp.error_uri : "");
}
oauth2.errCb({
authId: oauth2.auth.name,
source: "auth",
level: "error",
message: oauthErrorMsg || "[Authorization failed]: no accessCode received from the server."
});
}
} else {
oauth2.callback({auth: oauth2.auth, token: qp, isValid: isValid, redirectUrl: redirectUrl});
}
window.close();
}
if (document.readyState !== 'loading') {
run();
} else {
document.addEventListener('DOMContentLoaded', function () {
run();
});
}
</script>
</body>
</html>

View File

@ -0,0 +1,20 @@
window.onload = function() {
//<editor-fold desc="Changeable Configuration Block">
// the following lines will be replaced by docker/configurator, when it runs in a docker-container
window.ui = SwaggerUIBundle({
url: "openapi3_0.yaml",
dom_id: '#swagger-ui',
deepLinking: true,
presets: [
SwaggerUIBundle.presets.apis,
SwaggerUIStandalonePreset
],
plugins: [
SwaggerUIBundle.plugins.DownloadUrl
],
layout: "StandaloneLayout"
});
//</editor-fold>
};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -40,7 +40,13 @@ dependencies:
- tensorboard==2.9.0
- torchmetrics==0.9.3
- pip:
- flask==2.1.3
- flask_socketio==5.3.0
- flask_cors==3.0.10
- dependency_injector==4.40.0
- eventlet
- opencv-python==4.6.0
- protobuf==3.20.1
- realesrgan==0.2.5.0
- test-tube==0.7.5
- transformers==4.21.2

View File

@ -3,7 +3,7 @@ channels:
- pytorch
- defaults
dependencies:
- python=3.8.5
- python>=3.9
- pip=20.3
- cudatoolkit=11.3
- pytorch=1.11.0
@ -20,11 +20,16 @@ dependencies:
- realesrgan==0.2.5.0
- test-tube>=0.7.5
- streamlit==1.12.0
- pillow==9.2.0
- pillow==6.2.0
- einops==0.3.0
- torch-fidelity==0.3.0
- transformers==4.19.2
- torchmetrics==0.6.0
- flask==2.1.3
- flask_socketio==5.3.0
- flask_cors==3.0.10
- dependency_injector==4.40.0
- eventlet
- kornia==0.6.0
- -e git+https://github.com/openai/CLIP.git@main#egg=clip
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers

6
frontend/.eslintrc.cjs Normal file
View File

@ -0,0 +1,6 @@
module.exports = {
extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended', 'plugin:react-hooks/recommended'],
parser: '@typescript-eslint/parser',
plugins: ['@typescript-eslint', 'eslint-plugin-react-hooks'],
root: true,
};

25
frontend/.gitignore vendored Normal file
View File

@ -0,0 +1,25 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
# We want to distribute the repo
# dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

85
frontend/README.md Normal file
View File

@ -0,0 +1,85 @@
# Stable Diffusion Web UI
Demo at https://peaceful-otter-7a427f.netlify.app/ (not connected to back end)
much of this readme is just notes for myself during dev work
numpy rand: 0 to 4294967295
## Test and Build
from `frontend/`:
- `yarn dev` runs `tsc-watch`, which runs `vite build` on successful `tsc` transpilation
from `.`:
- `python backend/server.py` serves both frontend and backend at http://localhost:9090
## API
`backend/server.py` serves the UI and provides a [socket.io](https://github.com/socketio/socket.io) API via [flask-socketio](https://github.com/miguelgrinberg/flask-socketio).
### Server Listeners
The server listens for these socket.io events:
`cancel`
- Cancels in-progress image generation
- Returns ack only
`generateImage`
- Accepts object of image parameters
- Generates an image
- Returns ack only (image generation function sends progress and result via separate events)
`deleteImage`
- Accepts file path to image
- Deletes image
- Returns ack only
`deleteAllImages` WIP
- Deletes all images in `outputs/`
- Returns ack only
`requestAllImages`
- Returns array of all images in `outputs/`
`requestCapabilities` WIP
- Returns capabilities of server (torch device, GFPGAN and ESRGAN availability, ???)
`sendImage` WIP
- Accepts a File and attributes
- Saves image
- Used to save init images which are not generated images
### Server Emitters
`progress`
- Emitted during each step in generation
- Sends a number from 0 to 1 representing percentage of steps completed
`result` WIP
- Emitted when an image generation has completed
- Sends a object:
```
{
url: relative_file_path,
metadata: image_metadata_object
}
```
## TODO
- Search repo for "TODO"
- My one gripe with Chakra: no way to disable all animations right now and drop the dependence on `framer-motion`. I would prefer to save the ~30kb on bundle and have zero animations. This is on the Chakra roadmap. See https://github.com/chakra-ui/chakra-ui/pull/6368 for last discussion on this. Need to check in on this issue periodically.

View File

@ -0,0 +1 @@
.checkerboard{background-position:0px 0px,10px 10px;background-size:20px 20px;background-image:linear-gradient(45deg,#eee 25%,transparent 25%,transparent 75%,#eee 75%,#eee 100%),linear-gradient(45deg,#eee 25%,white 25%,white 75%,#eee 75%,#eee 100%)}

695
frontend/dist/assets/index.cc5cde43.js vendored Normal file

File diff suppressed because one or more lines are too long

14
frontend/dist/index.html vendored Normal file
View File

@ -0,0 +1,14 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Stable Diffusion Dream Server</title>
<script type="module" crossorigin src="/assets/index.cc5cde43.js"></script>
<link rel="stylesheet" href="/assets/index.447eb2a9.css">
</head>
<body>
<div id="root"></div>
</body>
</html>

1
frontend/index.d.ts vendored Normal file
View File

@ -0,0 +1 @@
declare module 'redux-socket.io-middleware';

12
frontend/index.html Normal file
View File

@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Stable Diffusion Dream Server</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

46
frontend/package.json Normal file
View File

@ -0,0 +1,46 @@
{
"name": "sdui",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "tsc-watch --onSuccess 'yarn run vite build -m development'",
"hmr": "vite dev",
"build": "tsc && vite build",
"build-dev": "tsc && vite build -m development",
"preview": "vite preview"
},
"dependencies": {
"@chakra-ui/react": "^2.3.1",
"@emotion/react": "^11.10.4",
"@emotion/styled": "^11.10.4",
"@reduxjs/toolkit": "^1.8.5",
"@types/uuid": "^8.3.4",
"dateformat": "^5.0.3",
"framer-motion": "^7.2.1",
"lodash": "^4.17.21",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-dropzone": "^14.2.2",
"react-icons": "^4.4.0",
"react-redux": "^8.0.2",
"redux-persist": "^6.0.0",
"socket.io-client": "^4.5.2",
"uuid": "^9.0.0"
},
"devDependencies": {
"@types/dateformat": "^5.0.0",
"@types/react": "^18.0.17",
"@types/react-dom": "^18.0.6",
"@typescript-eslint/eslint-plugin": "^5.36.2",
"@typescript-eslint/parser": "^5.36.2",
"@vitejs/plugin-react": "^2.0.1",
"eslint": "^8.23.0",
"eslint-plugin-prettier": "^4.2.1",
"eslint-plugin-react-hooks": "^4.6.0",
"tsc-watch": "^5.0.3",
"typescript": "^4.6.4",
"vite": "^3.0.7",
"vite-plugin-eslint": "^1.8.1"
}
}

60
frontend/src/App.tsx Normal file
View File

@ -0,0 +1,60 @@
import { Grid, GridItem } from '@chakra-ui/react';
import CurrentImage from './features/gallery/CurrentImage';
import LogViewer from './features/system/LogViewer';
import PromptInput from './features/sd/PromptInput';
import ProgressBar from './features/header/ProgressBar';
import { useEffect } from 'react';
import { useAppDispatch } from './app/hooks';
import { requestAllImages } from './app/socketio';
import ProcessButtons from './features/sd/ProcessButtons';
import ImageRoll from './features/gallery/ImageRoll';
import SiteHeader from './features/header/SiteHeader';
import OptionsAccordion from './features/sd/OptionsAccordion';
const App = () => {
const dispatch = useAppDispatch();
useEffect(() => {
dispatch(requestAllImages());
}, [dispatch]);
return (
<>
<Grid
width='100vw'
height='100vh'
templateAreas={`
"header header header header"
"progressBar progressBar progressBar progressBar"
"menu prompt processButtons imageRoll"
"menu currentImage currentImage imageRoll"`}
gridTemplateRows={'36px 10px 100px auto'}
gridTemplateColumns={'350px auto 100px 388px'}
gap={2}
>
<GridItem area={'header'} pt={1}>
<SiteHeader />
</GridItem>
<GridItem area={'progressBar'}>
<ProgressBar />
</GridItem>
<GridItem pl='2' area={'menu'} overflowY='scroll'>
<OptionsAccordion />
</GridItem>
<GridItem area={'prompt'}>
<PromptInput />
</GridItem>
<GridItem area={'processButtons'}>
<ProcessButtons />
</GridItem>
<GridItem area={'currentImage'}>
<CurrentImage />
</GridItem>
<GridItem pr='2' area={'imageRoll'} overflowY='scroll'>
<ImageRoll />
</GridItem>
</Grid>
<LogViewer />
</>
);
};
export default App;

22
frontend/src/Loading.tsx Normal file
View File

@ -0,0 +1,22 @@
import { Flex, Spinner } from '@chakra-ui/react';
const Loading = () => {
return (
<Flex
width={'100vw'}
height={'100vh'}
alignItems='center'
justifyContent='center'
>
<Spinner
thickness='2px'
speed='1s'
emptyColor='gray.200'
color='gray.400'
size='xl'
/>
</Flex>
);
};
export default Loading;

View File

@ -0,0 +1,55 @@
// TODO: use Enums?
// Valid samplers
export const SAMPLERS: Array<string> = [
'ddim',
'plms',
'k_lms',
'k_dpm_2',
'k_dpm_2_a',
'k_euler',
'k_euler_a',
'k_heun',
];
// Valid image widths
export const WIDTHS: Array<number> = [
64, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960,
1024,
];
// Valid image heights
export const HEIGHTS: Array<number> = [
64, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960,
1024,
];
// Valid upscaling levels
export const UPSCALING_LEVELS: Array<{ key: string; value: number }> = [
{ key: '2x', value: 2 },
{ key: '4x', value: 4 },
];
// Internal to human-readable parameters
export const PARAMETERS: { [key: string]: string } = {
prompt: 'Prompt',
iterations: 'Iterations',
steps: 'Steps',
cfgScale: 'CFG Scale',
height: 'Height',
width: 'Width',
sampler: 'Sampler',
seed: 'Seed',
img2imgStrength: 'img2img Strength',
gfpganStrength: 'GFPGAN Strength',
upscalingLevel: 'Upscaling Level',
upscalingStrength: 'Upscaling Strength',
initialImagePath: 'Initial Image',
maskPath: 'Initial Image Mask',
shouldFitToWidthHeight: 'Fit Initial Image',
seamless: 'Seamless Tiling',
};
export const NUMPY_RAND_MIN = 0;
export const NUMPY_RAND_MAX = 4294967295;

View File

@ -0,0 +1,7 @@
import { useDispatch, useSelector } from 'react-redux';
import type { TypedUseSelectorHook } from 'react-redux';
import type { RootState, AppDispatch } from './store';
// Use throughout your app instead of plain `useDispatch` and `useSelector`
export const useAppDispatch: () => AppDispatch = useDispatch;
export const useAppSelector: TypedUseSelectorHook<RootState> = useSelector;

View File

@ -0,0 +1,182 @@
import { SDState } from '../features/sd/sdSlice';
import randomInt from '../features/sd/util/randomInt';
import {
seedWeightsToString,
stringToSeedWeights,
} from '../features/sd/util/seedWeightPairs';
import { SystemState } from '../features/system/systemSlice';
import { NUMPY_RAND_MAX, NUMPY_RAND_MIN } from './constants';
/*
These functions translate frontend state into parameters
suitable for consumption by the backend, and vice-versa.
*/
export const frontendToBackendParameters = (
sdState: SDState,
systemState: SystemState
): { [key: string]: any } => {
const {
prompt,
iterations,
steps,
cfgScale,
height,
width,
sampler,
seed,
seamless,
shouldUseInitImage,
img2imgStrength,
initialImagePath,
maskPath,
shouldFitToWidthHeight,
shouldGenerateVariations,
variantAmount,
seedWeights,
shouldRunESRGAN,
upscalingLevel,
upscalingStrength,
shouldRunGFPGAN,
gfpganStrength,
shouldRandomizeSeed,
} = sdState;
const { shouldDisplayInProgress } = systemState;
const generationParameters: { [k: string]: any } = {
prompt,
iterations,
steps,
cfg_scale: cfgScale,
height,
width,
sampler_name: sampler,
seed,
seamless,
progress_images: shouldDisplayInProgress,
};
generationParameters.seed = shouldRandomizeSeed
? randomInt(NUMPY_RAND_MIN, NUMPY_RAND_MAX)
: seed;
if (shouldUseInitImage) {
generationParameters.init_img = initialImagePath;
generationParameters.strength = img2imgStrength;
generationParameters.fit = shouldFitToWidthHeight;
if (maskPath) {
generationParameters.init_mask = maskPath;
}
}
if (shouldGenerateVariations) {
generationParameters.variation_amount = variantAmount;
if (seedWeights) {
generationParameters.with_variations =
stringToSeedWeights(seedWeights);
}
} else {
generationParameters.variation_amount = 0;
}
let esrganParameters: false | { [k: string]: any } = false;
let gfpganParameters: false | { [k: string]: any } = false;
if (shouldRunESRGAN) {
esrganParameters = {
level: upscalingLevel,
strength: upscalingStrength,
};
}
if (shouldRunGFPGAN) {
gfpganParameters = {
strength: gfpganStrength,
};
}
return {
generationParameters,
esrganParameters,
gfpganParameters,
};
};
export const backendToFrontendParameters = (parameters: {
[key: string]: any;
}) => {
const {
prompt,
iterations,
steps,
cfg_scale,
height,
width,
sampler_name,
seed,
seamless,
progress_images,
variation_amount,
with_variations,
gfpgan_strength,
upscale,
init_img,
init_mask,
strength,
} = parameters;
const sd: { [key: string]: any } = {
shouldDisplayInProgress: progress_images,
// init
shouldGenerateVariations: false,
shouldRunESRGAN: false,
shouldRunGFPGAN: false,
initialImagePath: '',
maskPath: '',
};
if (variation_amount > 0) {
sd.shouldGenerateVariations = true;
sd.variantAmount = variation_amount;
if (with_variations) {
sd.seedWeights = seedWeightsToString(with_variations);
}
}
if (gfpgan_strength > 0) {
sd.shouldRunGFPGAN = true;
sd.gfpganStrength = gfpgan_strength;
}
if (upscale) {
sd.shouldRunESRGAN = true;
sd.upscalingLevel = upscale[0];
sd.upscalingStrength = upscale[1];
}
if (init_img) {
sd.shouldUseInitImage = true
sd.initialImagePath = init_img;
sd.strength = strength;
if (init_mask) {
sd.maskPath = init_mask;
}
}
// if we had a prompt, add all the metadata, but if we don't have a prompt,
// we must have only done ESRGAN or GFPGAN so do not add that metadata
if (prompt) {
sd.prompt = prompt;
sd.iterations = iterations;
sd.steps = steps;
sd.cfgScale = cfg_scale;
sd.height = height;
sd.width = width;
sd.sampler = sampler_name;
sd.seed = seed;
sd.seamless = seamless;
}
return sd;
};

View File

@ -0,0 +1,393 @@
import { createAction, Middleware } from '@reduxjs/toolkit';
import { io } from 'socket.io-client';
import {
addImage,
clearIntermediateImage,
removeImage,
SDImage,
SDMetadata,
setGalleryImages,
setIntermediateImage,
} from '../features/gallery/gallerySlice';
import {
addLogEntry,
setCurrentStep,
setIsConnected,
setIsProcessing,
} from '../features/system/systemSlice';
import { v4 as uuidv4 } from 'uuid';
import { setInitialImagePath, setMaskPath } from '../features/sd/sdSlice';
import {
backendToFrontendParameters,
frontendToBackendParameters,
} from './parameterTranslation';
export interface SocketIOResponse {
status: 'OK' | 'ERROR';
message?: string;
data?: any;
}
export const socketioMiddleware = () => {
const { hostname, port } = new URL(window.location.href);
const socketio = io(`http://${hostname}:9090`);
let areListenersSet = false;
const middleware: Middleware = (store) => (next) => (action) => {
const { dispatch, getState } = store;
if (!areListenersSet) {
// CONNECT
socketio.on('connect', () => {
try {
dispatch(setIsConnected(true));
} catch (e) {
console.error(e);
}
});
// DISCONNECT
socketio.on('disconnect', () => {
try {
dispatch(setIsConnected(false));
dispatch(setIsProcessing(false));
dispatch(addLogEntry(`Disconnected from server`));
} catch (e) {
console.error(e);
}
});
// PROCESSING RESULT
socketio.on(
'result',
(data: {
url: string;
type: 'generation' | 'esrgan' | 'gfpgan';
uuid?: string;
metadata: { [key: string]: any };
}) => {
try {
const newUuid = uuidv4();
const { type, url, uuid, metadata } = data;
switch (type) {
case 'generation': {
const translatedMetadata =
backendToFrontendParameters(metadata);
dispatch(
addImage({
uuid: newUuid,
url,
metadata: translatedMetadata,
})
);
dispatch(
addLogEntry(`Image generated: ${url}`)
);
break;
}
case 'esrgan': {
const originalImage =
getState().gallery.images.find(
(i: SDImage) => i.uuid === uuid
);
const newMetadata = {
...originalImage.metadata,
};
newMetadata.shouldRunESRGAN = true;
newMetadata.upscalingLevel =
metadata.upscale[0];
newMetadata.upscalingStrength =
metadata.upscale[1];
dispatch(
addImage({
uuid: newUuid,
url,
metadata: newMetadata,
})
);
dispatch(
addLogEntry(`ESRGAN upscaled: ${url}`)
);
break;
}
case 'gfpgan': {
const originalImage =
getState().gallery.images.find(
(i: SDImage) => i.uuid === uuid
);
const newMetadata = {
...originalImage.metadata,
};
newMetadata.shouldRunGFPGAN = true;
newMetadata.gfpganStrength =
metadata.gfpgan_strength;
dispatch(
addImage({
uuid: newUuid,
url,
metadata: newMetadata,
})
);
dispatch(
addLogEntry(`GFPGAN fixed faces: ${url}`)
);
break;
}
}
dispatch(setIsProcessing(false));
} catch (e) {
console.error(e);
}
}
);
// PROGRESS UPDATE
socketio.on('progress', (data: { step: number }) => {
try {
dispatch(setIsProcessing(true));
dispatch(setCurrentStep(data.step));
} catch (e) {
console.error(e);
}
});
// INTERMEDIATE IMAGE
socketio.on(
'intermediateResult',
(data: { url: string; metadata: SDMetadata }) => {
try {
const uuid = uuidv4();
const { url, metadata } = data;
dispatch(
setIntermediateImage({
uuid,
url,
metadata,
})
);
dispatch(
addLogEntry(`Intermediate image generated: ${url}`)
);
} catch (e) {
console.error(e);
}
}
);
// ERROR FROM BACKEND
socketio.on('error', (message) => {
try {
dispatch(addLogEntry(`Server error: ${message}`));
dispatch(setIsProcessing(false));
dispatch(clearIntermediateImage());
} catch (e) {
console.error(e);
}
});
areListenersSet = true;
}
// HANDLE ACTIONS
switch (action.type) {
// GENERATE IMAGE
case 'socketio/generateImage': {
dispatch(setIsProcessing(true));
dispatch(setCurrentStep(-1));
const {
generationParameters,
esrganParameters,
gfpganParameters,
} = frontendToBackendParameters(
getState().sd,
getState().system
);
socketio.emit(
'generateImage',
generationParameters,
esrganParameters,
gfpganParameters
);
dispatch(
addLogEntry(
`Image generation requested: ${JSON.stringify({
...generationParameters,
...esrganParameters,
...gfpganParameters,
})}`
)
);
break;
}
// RUN ESRGAN (UPSCALING)
case 'socketio/runESRGAN': {
const imageToProcess = action.payload;
dispatch(setIsProcessing(true));
dispatch(setCurrentStep(-1));
const { upscalingLevel, upscalingStrength } = getState().sd;
const esrganParameters = {
upscale: [upscalingLevel, upscalingStrength],
};
socketio.emit('runESRGAN', imageToProcess, esrganParameters);
dispatch(
addLogEntry(
`ESRGAN upscale requested: ${JSON.stringify({
file: imageToProcess.url,
...esrganParameters,
})}`
)
);
break;
}
// RUN GFPGAN (FIX FACES)
case 'socketio/runGFPGAN': {
const imageToProcess = action.payload;
dispatch(setIsProcessing(true));
dispatch(setCurrentStep(-1));
const { gfpganStrength } = getState().sd;
const gfpganParameters = {
gfpgan_strength: gfpganStrength,
};
socketio.emit('runGFPGAN', imageToProcess, gfpganParameters);
dispatch(
addLogEntry(
`GFPGAN fix faces requested: ${JSON.stringify({
file: imageToProcess.url,
...gfpganParameters,
})}`
)
);
break;
}
// DELETE IMAGE
case 'socketio/deleteImage': {
const imageToDelete = action.payload;
const { url } = imageToDelete;
socketio.emit(
'deleteImage',
url,
(response: SocketIOResponse) => {
if (response.status === 'OK') {
dispatch(removeImage(imageToDelete));
dispatch(addLogEntry(`Image deleted: ${url}`));
}
}
);
break;
}
// GET ALL IMAGES FOR GALLERY
case 'socketio/requestAllImages': {
socketio.emit(
'requestAllImages',
(response: SocketIOResponse) => {
dispatch(setGalleryImages(response.data));
dispatch(
addLogEntry(`Loaded ${response.data.length} images`)
);
}
);
break;
}
// CANCEL PROCESSING
case 'socketio/cancelProcessing': {
socketio.emit('cancel', (response: SocketIOResponse) => {
const { intermediateImage } = getState().gallery;
if (response.status === 'OK') {
dispatch(setIsProcessing(false));
if (intermediateImage) {
dispatch(addImage(intermediateImage));
dispatch(
addLogEntry(
`Intermediate image saved: ${intermediateImage.url}`
)
);
dispatch(clearIntermediateImage());
}
dispatch(addLogEntry(`Processing canceled`));
}
});
break;
}
// UPLOAD INITIAL IMAGE
case 'socketio/uploadInitialImage': {
const file = action.payload;
socketio.emit(
'uploadInitialImage',
file,
file.name,
(response: SocketIOResponse) => {
if (response.status === 'OK') {
dispatch(setInitialImagePath(response.data));
dispatch(
addLogEntry(
`Initial image uploaded: ${response.data}`
)
);
}
}
);
break;
}
// UPLOAD MASK IMAGE
case 'socketio/uploadMaskImage': {
const file = action.payload;
socketio.emit(
'uploadMaskImage',
file,
file.name,
(response: SocketIOResponse) => {
if (response.status === 'OK') {
dispatch(setMaskPath(response.data));
dispatch(
addLogEntry(
`Mask image uploaded: ${response.data}`
)
);
}
}
);
break;
}
}
next(action);
};
return middleware;
};
// Actions to be used by app
export const generateImage = createAction<undefined>('socketio/generateImage');
export const runESRGAN = createAction<SDImage>('socketio/runESRGAN');
export const runGFPGAN = createAction<SDImage>('socketio/runGFPGAN');
export const deleteImage = createAction<SDImage>('socketio/deleteImage');
export const requestAllImages = createAction<undefined>(
'socketio/requestAllImages'
);
export const cancelProcessing = createAction<undefined>(
'socketio/cancelProcessing'
);
export const uploadInitialImage = createAction<File>(
'socketio/uploadInitialImage'
);
export const uploadMaskImage = createAction<File>('socketio/uploadMaskImage');

53
frontend/src/app/store.ts Normal file
View File

@ -0,0 +1,53 @@
import { combineReducers, configureStore } from '@reduxjs/toolkit';
import { persistReducer } from 'redux-persist';
import storage from 'redux-persist/lib/storage'; // defaults to localStorage for web
import sdReducer from '../features/sd/sdSlice';
import galleryReducer from '../features/gallery/gallerySlice';
import systemReducer from '../features/system/systemSlice';
import { socketioMiddleware } from './socketio';
const reducers = combineReducers({
sd: sdReducer,
gallery: galleryReducer,
system: systemReducer,
});
const persistConfig = {
key: 'root',
storage,
};
const persistedReducer = persistReducer(persistConfig, reducers);
/*
The frontend needs to be distributed as a production build, so
we cannot reasonably ask users to edit the JS and specify the
host and port on which the socket.io server will run.
The solution is to allow server script to be run with arguments
(or just edited) providing the host and port. Then, the server
serves a route `/socketio_config` which responds with the host
and port.
When the frontend loads, it synchronously requests that route
and thus gets the host and port. This requires a suspicious
fetch somewhere, and the store setup seems like as good a place
as any to make this fetch request.
*/
// Continue with store setup
export const store = configureStore({
reducer: persistedReducer,
middleware: (getDefaultMiddleware) =>
getDefaultMiddleware({
// redux-persist sometimes needs to have a function in redux, need to disable this check
serializableCheck: false,
}).concat(socketioMiddleware()),
});
// Infer the `RootState` and `AppDispatch` types from the store itself
export type RootState = ReturnType<typeof store.getState>;
// Inferred type: {posts: PostsState, comments: CommentsState, users: UsersState}
export type AppDispatch = typeof store.dispatch;

37
frontend/src/app/theme.ts Normal file
View File

@ -0,0 +1,37 @@
import { extendTheme } from '@chakra-ui/react';
import type { StyleFunctionProps } from '@chakra-ui/styled-system';
export const theme = extendTheme({
config: {
initialColorMode: 'dark',
useSystemColorMode: false,
},
components: {
Tooltip: {
baseStyle: (props: StyleFunctionProps) => ({
textColor: props.colorMode === 'dark' ? 'gray.800' : 'gray.100',
}),
},
Accordion: {
baseStyle: (props: StyleFunctionProps) => ({
button: {
fontWeight: 'bold',
_hover: {
bgColor:
props.colorMode === 'dark'
? 'rgba(255,255,255,0.05)'
: 'rgba(0,0,0,0.05)',
},
},
panel: {
paddingBottom: 2,
},
}),
},
FormLabel: {
baseStyle: {
fontWeight: 'light',
},
},
},
});

View File

@ -0,0 +1,16 @@
import { Button, ButtonProps } from '@chakra-ui/react';
interface Props extends ButtonProps {
label: string;
}
const SDButton = (props: Props) => {
const { label, size = 'sm', ...rest } = props;
return (
<Button size={size} {...rest}>
{label}
</Button>
);
};
export default SDButton;

View File

@ -0,0 +1,56 @@
import {
FormControl,
NumberInput,
NumberInputField,
NumberInputStepper,
NumberIncrementStepper,
NumberDecrementStepper,
Text,
FormLabel,
NumberInputProps,
Flex,
} from '@chakra-ui/react';
interface Props extends NumberInputProps {
label?: string;
width?: string | number;
}
const SDNumberInput = (props: Props) => {
const {
label,
isDisabled = false,
fontSize = 'md',
size = 'sm',
width,
isInvalid,
...rest
} = props;
return (
<FormControl isDisabled={isDisabled} width={width} isInvalid={isInvalid}>
<Flex gap={2} justifyContent={'space-between'} alignItems={'center'}>
{label && (
<FormLabel marginBottom={1}>
<Text fontSize={fontSize} whiteSpace='nowrap'>
{label}
</Text>
</FormLabel>
)}
<NumberInput
size={size}
{...rest}
keepWithinRange={false}
clampValueOnBlur={true}
>
<NumberInputField fontSize={'md'}/>
<NumberInputStepper>
<NumberIncrementStepper />
<NumberDecrementStepper />
</NumberInputStepper>
</NumberInput>
</Flex>
</FormControl>
);
};
export default SDNumberInput;

View File

@ -0,0 +1,57 @@
import {
Flex,
FormControl,
FormLabel,
Select,
SelectProps,
Text,
} from '@chakra-ui/react';
interface Props extends SelectProps {
label: string;
validValues:
| Array<number | string>
| Array<{ key: string; value: string | number }>;
}
const SDSelect = (props: Props) => {
const {
label,
isDisabled,
validValues,
size = 'sm',
fontSize = 'md',
marginBottom = 1,
whiteSpace = 'nowrap',
...rest
} = props;
return (
<FormControl isDisabled={isDisabled}>
<Flex justifyContent={'space-between'} alignItems={'center'}>
<FormLabel
marginBottom={marginBottom}
>
<Text fontSize={fontSize} whiteSpace={whiteSpace}>
{label}
</Text>
</FormLabel>
<Select fontSize={fontSize} size={size} {...rest}>
{validValues.map((opt) => {
return typeof opt === 'string' ||
typeof opt === 'number' ? (
<option key={opt} value={opt}>
{opt}
</option>
) : (
<option key={opt.value} value={opt.value}>
{opt.key}
</option>
);
})}
</Select>
</Flex>
</FormControl>
);
};
export default SDSelect;

View File

@ -0,0 +1,42 @@
import {
Flex,
FormControl,
FormLabel,
Switch,
SwitchProps,
} from '@chakra-ui/react';
interface Props extends SwitchProps {
label?: string;
width?: string | number;
}
const SDSwitch = (props: Props) => {
const {
label,
isDisabled = false,
fontSize = 'md',
size = 'md',
width,
...rest
} = props;
return (
<FormControl isDisabled={isDisabled} width={width}>
<Flex justifyContent={'space-between'} alignItems={'center'}>
{label && (
<FormLabel
fontSize={fontSize}
marginBottom={1}
flexGrow={2}
whiteSpace='nowrap'
>
{label}
</FormLabel>
)}
<Switch size={size} {...rest} />
</Flex>
</FormControl>
);
};
export default SDSwitch;

View File

@ -0,0 +1,161 @@
import { Center, Flex, Image, useColorModeValue } from '@chakra-ui/react';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import { setAllParameters, setInitialImagePath, setSeed } from '../sd/sdSlice';
import { useState } from 'react';
import ImageMetadataViewer from './ImageMetadataViewer';
import DeleteImageModalButton from './DeleteImageModalButton';
import SDButton from '../../components/SDButton';
import { runESRGAN, runGFPGAN } from '../../app/socketio';
import { createSelector } from '@reduxjs/toolkit';
import { SystemState } from '../system/systemSlice';
import { isEqual } from 'lodash';
const height = 'calc(100vh - 238px)';
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return {
isProcessing: system.isProcessing,
isConnected: system.isConnected,
isGFPGANAvailable: system.isGFPGANAvailable,
isESRGANAvailable: system.isESRGANAvailable,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const CurrentImage = () => {
const { currentImage, intermediateImage } = useAppSelector(
(state: RootState) => state.gallery
);
const { isProcessing, isConnected, isGFPGANAvailable, isESRGANAvailable } =
useAppSelector(systemSelector);
const dispatch = useAppDispatch();
const bgColor = useColorModeValue(
'rgba(255, 255, 255, 0.85)',
'rgba(0, 0, 0, 0.8)'
);
const [shouldShowImageDetails, setShouldShowImageDetails] =
useState<boolean>(false);
const imageToDisplay = intermediateImage || currentImage;
return (
<Flex direction={'column'} rounded={'md'} borderWidth={1} p={2} gap={2}>
{imageToDisplay && (
<Flex gap={2}>
<SDButton
label='Use as initial image'
colorScheme={'gray'}
flexGrow={1}
variant={'outline'}
onClick={() =>
dispatch(setInitialImagePath(imageToDisplay.url))
}
/>
<SDButton
label='Use all'
colorScheme={'gray'}
flexGrow={1}
variant={'outline'}
onClick={() =>
dispatch(setAllParameters(imageToDisplay.metadata))
}
/>
<SDButton
label='Use seed'
colorScheme={'gray'}
flexGrow={1}
variant={'outline'}
isDisabled={!imageToDisplay.metadata.seed}
onClick={() =>
dispatch(setSeed(imageToDisplay.metadata.seed!))
}
/>
<SDButton
label='Upscale'
colorScheme={'gray'}
flexGrow={1}
variant={'outline'}
isDisabled={
!isESRGANAvailable ||
Boolean(intermediateImage) ||
!(isConnected && !isProcessing)
}
onClick={() => dispatch(runESRGAN(imageToDisplay))}
/>
<SDButton
label='Fix faces'
colorScheme={'gray'}
flexGrow={1}
variant={'outline'}
isDisabled={
!isGFPGANAvailable ||
Boolean(intermediateImage) ||
!(isConnected && !isProcessing)
}
onClick={() => dispatch(runGFPGAN(imageToDisplay))}
/>
<SDButton
label='Details'
colorScheme={'gray'}
variant={shouldShowImageDetails ? 'solid' : 'outline'}
borderWidth={1}
flexGrow={1}
onClick={() =>
setShouldShowImageDetails(!shouldShowImageDetails)
}
/>
<DeleteImageModalButton image={imageToDisplay}>
<SDButton
label='Delete'
colorScheme={'red'}
flexGrow={1}
variant={'outline'}
isDisabled={Boolean(intermediateImage)}
/>
</DeleteImageModalButton>
</Flex>
)}
<Center height={height} position={'relative'}>
{imageToDisplay && (
<Image
src={imageToDisplay.url}
fit='contain'
maxWidth={'100%'}
maxHeight={'100%'}
/>
)}
{imageToDisplay && shouldShowImageDetails && (
<Flex
width={'100%'}
height={'100%'}
position={'absolute'}
top={0}
left={0}
p={3}
boxSizing='border-box'
backgroundColor={bgColor}
overflow='scroll'
>
<ImageMetadataViewer image={imageToDisplay} />
</Flex>
)}
</Center>
</Flex>
);
};
export default CurrentImage;

View File

@ -0,0 +1,94 @@
import {
IconButtonProps,
Modal,
ModalBody,
ModalCloseButton,
ModalContent,
ModalFooter,
ModalHeader,
ModalOverlay,
Text,
useDisclosure,
} from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import {
cloneElement,
ReactElement,
SyntheticEvent,
} from 'react';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { deleteImage } from '../../app/socketio';
import { RootState } from '../../app/store';
import SDButton from '../../components/SDButton';
import { setShouldConfirmOnDelete, SystemState } from '../system/systemSlice';
import { SDImage } from './gallerySlice';
interface Props extends IconButtonProps {
image: SDImage;
'aria-label': string;
children: ReactElement;
}
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => system.shouldConfirmOnDelete
);
/*
TODO: The modal and button to open it should be two different components,
but their state is closely related and I'm not sure how best to accomplish it.
*/
const DeleteImageModalButton = (props: Omit<Props, 'aria-label'>) => {
const { isOpen, onOpen, onClose } = useDisclosure();
const dispatch = useAppDispatch();
const shouldConfirmOnDelete = useAppSelector(systemSelector);
const handleClickDelete = (e: SyntheticEvent) => {
e.stopPropagation();
shouldConfirmOnDelete ? onOpen() : handleDelete();
};
const { image, children } = props;
const handleDelete = () => {
dispatch(deleteImage(image));
onClose();
};
const handleDeleteAndDontAsk = () => {
dispatch(deleteImage(image));
dispatch(setShouldConfirmOnDelete(false));
onClose();
};
return (
<>
{cloneElement(children, {
onClick: handleClickDelete,
})}
<Modal isOpen={isOpen} onClose={onClose}>
<ModalOverlay />
<ModalContent>
<ModalHeader>Are you sure you want to delete this image?</ModalHeader>
<ModalCloseButton />
<ModalBody>
<Text>It will be deleted forever!</Text>
</ModalBody>
<ModalFooter justifyContent={'space-between'}>
<SDButton label={'Yes'} colorScheme='red' onClick={handleDelete} />
<SDButton
label={"Yes, and don't ask me again"}
colorScheme='red'
onClick={handleDeleteAndDontAsk}
/>
<SDButton label='Cancel' colorScheme='blue' onClick={onClose} />
</ModalFooter>
</ModalContent>
</Modal>
</>
);
};
export default DeleteImageModalButton;

View File

@ -0,0 +1,124 @@
import {
Center,
Flex,
IconButton,
Link,
List,
ListItem,
Text,
} from '@chakra-ui/react';
import { FaPlus } from 'react-icons/fa';
import { PARAMETERS } from '../../app/constants';
import { useAppDispatch } from '../../app/hooks';
import SDButton from '../../components/SDButton';
import { setAllParameters, setParameter } from '../sd/sdSlice';
import { SDImage, SDMetadata } from './gallerySlice';
type Props = {
image: SDImage;
};
const ImageMetadataViewer = ({ image }: Props) => {
const dispatch = useAppDispatch();
const keys = Object.keys(PARAMETERS);
const metadata: Array<{
label: string;
key: string;
value: string | number | boolean;
}> = [];
keys.forEach((key) => {
const value = image.metadata[key as keyof SDMetadata];
if (value !== undefined) {
metadata.push({ label: PARAMETERS[key], key, value });
}
});
return (
<Flex gap={2} direction={'column'} overflowY={'scroll'} width={'100%'}>
<SDButton
label='Use all parameters'
colorScheme={'gray'}
padding={2}
isDisabled={metadata.length === 0}
onClick={() => dispatch(setAllParameters(image.metadata))}
/>
<Flex gap={2}>
<Text fontWeight={'semibold'}>File:</Text>
<Link href={image.url} isExternal>
<Text>{image.url}</Text>
</Link>
</Flex>
{metadata.length ? (
<>
<List>
{metadata.map((parameter, i) => {
const { label, key, value } = parameter;
return (
<ListItem key={i} pb={1}>
<Flex gap={2}>
<IconButton
aria-label='Use this parameter'
icon={<FaPlus />}
size={'xs'}
onClick={() =>
dispatch(
setParameter({
key,
value,
})
)
}
/>
<Text fontWeight={'semibold'}>
{label}:
</Text>
{value === undefined ||
value === null ||
value === '' ||
value === 0 ? (
<Text
maxHeight={100}
fontStyle={'italic'}
>
None
</Text>
) : (
<Text
maxHeight={100}
overflowY={'scroll'}
>
{value.toString()}
</Text>
)}
</Flex>
</ListItem>
);
})}
</List>
<Flex gap={2}>
<Text fontWeight={'semibold'}>Raw:</Text>
<Text
maxHeight={100}
overflowY={'scroll'}
wordBreak={'break-all'}
>
{JSON.stringify(image.metadata)}
</Text>
</Flex>
</>
) : (
<Center width={'100%'} pt={10}>
<Text fontSize={'lg'} fontWeight='semibold'>
No metadata available
</Text>
</Center>
)}
</Flex>
);
};
export default ImageMetadataViewer;

View File

@ -0,0 +1,150 @@
import {
Box,
Flex,
Icon,
IconButton,
Image,
useColorModeValue,
} from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { SDImage, setCurrentImage } from './gallerySlice';
import { FaCheck, FaCopy, FaSeedling, FaTrash } from 'react-icons/fa';
import DeleteImageModalButton from './DeleteImageModalButton';
import { memo, SyntheticEvent, useState } from 'react';
import { setAllParameters, setSeed } from '../sd/sdSlice';
interface HoverableImageProps {
image: SDImage;
isSelected: boolean;
}
const HoverableImage = memo(
(props: HoverableImageProps) => {
const [isHovered, setIsHovered] = useState<boolean>(false);
const dispatch = useAppDispatch();
const checkColor = useColorModeValue('green.600', 'green.300');
const bgColor = useColorModeValue('gray.200', 'gray.700');
const bgGradient = useColorModeValue(
'radial-gradient(circle, rgba(255,255,255,0.7) 0%, rgba(255,255,255,0.7) 20%, rgba(0,0,0,0) 100%)',
'radial-gradient(circle, rgba(0,0,0,0.7) 0%, rgba(0,0,0,0.7) 20%, rgba(0,0,0,0) 100%)'
);
const { image, isSelected } = props;
const { url, uuid, metadata } = image;
const handleMouseOver = () => setIsHovered(true);
const handleMouseOut = () => setIsHovered(false);
const handleClickSetAllParameters = (e: SyntheticEvent) => {
e.stopPropagation();
dispatch(setAllParameters(metadata));
};
const handleClickSetSeed = (e: SyntheticEvent) => {
e.stopPropagation();
dispatch(setSeed(image.metadata.seed!)); // component not rendered unless this exists
};
return (
<Box position={'relative'} key={uuid}>
<Image
width={120}
height={120}
objectFit='cover'
rounded={'md'}
src={url}
loading={'lazy'}
backgroundColor={bgColor}
/>
<Flex
cursor={'pointer'}
position={'absolute'}
top={0}
left={0}
rounded={'md'}
width='100%'
height='100%'
alignItems={'center'}
justifyContent={'center'}
background={isSelected ? bgGradient : undefined}
onClick={() => dispatch(setCurrentImage(image))}
onMouseOver={handleMouseOver}
onMouseOut={handleMouseOut}
>
{isSelected && (
<Icon
fill={checkColor}
width={'50%'}
height={'50%'}
as={FaCheck}
/>
)}
{isHovered && (
<Flex
direction={'column'}
gap={1}
position={'absolute'}
top={1}
right={1}
>
<DeleteImageModalButton image={image}>
<IconButton
colorScheme='red'
aria-label='Delete image'
icon={<FaTrash />}
size='xs'
fontSize={15}
/>
</DeleteImageModalButton>
<IconButton
aria-label='Use all parameters'
colorScheme={'blue'}
icon={<FaCopy />}
size='xs'
fontSize={15}
onClickCapture={handleClickSetAllParameters}
/>
{image.metadata.seed && (
<IconButton
aria-label='Use seed'
colorScheme={'blue'}
icon={<FaSeedling />}
size='xs'
fontSize={16}
onClickCapture={handleClickSetSeed}
/>
)}
</Flex>
)}
</Flex>
</Box>
);
},
(prev, next) =>
prev.image.uuid === next.image.uuid &&
prev.isSelected === next.isSelected
);
const ImageRoll = () => {
const { images, currentImageUuid } = useAppSelector(
(state: RootState) => state.gallery
);
return (
<Flex gap={2} wrap='wrap' pb={2}>
{[...images].reverse().map((image) => {
const { uuid } = image;
const isSelected = currentImageUuid === uuid;
return (
<HoverableImage
key={uuid}
image={image}
isSelected={isSelected}
/>
);
})}
</Flex>
);
};
export default ImageRoll;

View File

@ -0,0 +1,144 @@
import { createSlice } from '@reduxjs/toolkit';
import type { PayloadAction } from '@reduxjs/toolkit';
import { v4 as uuidv4 } from 'uuid';
import { UpscalingLevel } from '../sd/sdSlice';
import { backendToFrontendParameters } from '../../app/parameterTranslation';
// TODO: Revise pending metadata RFC: https://github.com/lstein/stable-diffusion/issues/266
export interface SDMetadata {
prompt?: string;
steps?: number;
cfgScale?: number;
height?: number;
width?: number;
sampler?: string;
seed?: number;
img2imgStrength?: number;
gfpganStrength?: number;
upscalingLevel?: UpscalingLevel;
upscalingStrength?: number;
initialImagePath?: string;
maskPath?: string;
seamless?: boolean;
shouldFitToWidthHeight?: boolean;
}
export interface SDImage {
// TODO: I have installed @types/uuid but cannot figure out how to use them here.
uuid: string;
url: string;
metadata: SDMetadata;
}
export interface GalleryState {
currentImageUuid: string;
images: Array<SDImage>;
intermediateImage?: SDImage;
currentImage?: SDImage;
}
const initialState: GalleryState = {
currentImageUuid: '',
images: [],
};
export const gallerySlice = createSlice({
name: 'gallery',
initialState,
reducers: {
setCurrentImage: (state, action: PayloadAction<SDImage>) => {
state.currentImage = action.payload;
state.currentImageUuid = action.payload.uuid;
},
removeImage: (state, action: PayloadAction<SDImage>) => {
const { uuid } = action.payload;
const newImages = state.images.filter((image) => image.uuid !== uuid);
const imageToDeleteIndex = state.images.findIndex(
(image) => image.uuid === uuid
);
const newCurrentImageIndex = Math.min(
Math.max(imageToDeleteIndex, 0),
newImages.length - 1
);
state.images = newImages;
state.currentImage = newImages.length
? newImages[newCurrentImageIndex]
: undefined;
state.currentImageUuid = newImages.length
? newImages[newCurrentImageIndex].uuid
: '';
},
addImage: (state, action: PayloadAction<SDImage>) => {
state.images.push(action.payload);
state.currentImageUuid = action.payload.uuid;
state.intermediateImage = undefined;
state.currentImage = action.payload;
},
setIntermediateImage: (state, action: PayloadAction<SDImage>) => {
state.intermediateImage = action.payload;
},
clearIntermediateImage: (state) => {
state.intermediateImage = undefined;
},
setGalleryImages: (
state,
action: PayloadAction<
Array<{
path: string;
metadata: { [key: string]: string | number | boolean };
}>
>
) => {
// TODO: Revise pending metadata RFC: https://github.com/lstein/stable-diffusion/issues/266
const images = action.payload;
if (images.length === 0) {
// there are no images on disk, clear the gallery
state.images = [];
state.currentImageUuid = '';
state.currentImage = undefined;
} else {
// Filter image urls that are already in the rehydrated state
const filteredImages = action.payload.filter(
(image) => !state.images.find((i) => i.url === image.path)
);
const preparedImages = filteredImages.map((image): SDImage => {
return {
uuid: uuidv4(),
url: image.path,
metadata: backendToFrontendParameters(image.metadata),
};
});
const newImages = [...state.images].concat(preparedImages);
// if previous currentimage no longer exists, set a new one
if (!newImages.find((image) => image.uuid === state.currentImageUuid)) {
const newCurrentImage = newImages[newImages.length - 1];
state.currentImage = newCurrentImage;
state.currentImageUuid = newCurrentImage.uuid;
}
state.images = newImages;
}
},
},
});
export const {
setCurrentImage,
removeImage,
addImage,
setGalleryImages,
setIntermediateImage,
clearIntermediateImage,
} = gallerySlice.actions;
export default gallerySlice.reducer;

View File

@ -0,0 +1,35 @@
import { Progress } from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import { SDState } from '../sd/sdSlice';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
realSteps: sd.realSteps,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const ProgressBar = () => {
const { realSteps } = useAppSelector(sdSelector);
const { currentStep } = useAppSelector((state: RootState) => state.system);
const progress = Math.round((currentStep * 100) / realSteps);
return (
<Progress
height='10px'
value={progress}
isIndeterminate={progress < 0 || currentStep === realSteps}
/>
);
};
export default ProgressBar;

View File

@ -0,0 +1,93 @@
import {
Flex,
Heading,
IconButton,
Link,
Spacer,
Text,
useColorMode,
} from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { FaSun, FaMoon, FaGithub } from 'react-icons/fa';
import { MdHelp, MdSettings } from 'react-icons/md';
import { useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import SettingsModal from '../system/SettingsModal';
import { SystemState } from '../system/systemSlice';
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return { isConnected: system.isConnected };
},
{
memoizeOptions: { resultEqualityCheck: isEqual },
}
);
const SiteHeader = () => {
const { colorMode, toggleColorMode } = useColorMode();
const { isConnected } = useAppSelector(systemSelector);
return (
<Flex minWidth='max-content' alignItems='center' gap='1' pl={2} pr={1}>
<Heading size={'lg'}>Stable Diffusion Dream Server</Heading>
<Spacer />
<Text textColor={isConnected ? 'green.500' : 'red.500'}>
{isConnected ? `Connected to server` : 'No connection to server'}
</Text>
<SettingsModal>
<IconButton
aria-label='Settings'
variant='link'
fontSize={24}
size={'sm'}
icon={<MdSettings />}
/>
</SettingsModal>
<IconButton
aria-label='Link to Github Issues'
variant='link'
fontSize={23}
size={'sm'}
icon={
<Link
isExternal
href='http://github.com/lstein/stable-diffusion/issues'
>
<MdHelp />
</Link>
}
/>
<IconButton
aria-label='Link to Github Repo'
variant='link'
fontSize={20}
size={'sm'}
icon={
<Link isExternal href='http://github.com/lstein/stable-diffusion'>
<FaGithub />
</Link>
}
/>
<IconButton
aria-label='Toggle Dark Mode'
onClick={toggleColorMode}
variant='link'
size={'sm'}
fontSize={colorMode == 'light' ? 18 : 20}
icon={colorMode == 'light' ? <FaMoon /> : <FaSun />}
/>
</Flex>
);
};
export default SiteHeader;

View File

@ -0,0 +1,84 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import {
setUpscalingLevel,
setUpscalingStrength,
UpscalingLevel,
SDState,
} from '../sd/sdSlice';
import SDNumberInput from '../../components/SDNumberInput';
import SDSelect from '../../components/SDSelect';
import { UPSCALING_LEVELS } from '../../app/constants';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { SystemState } from '../system/systemSlice';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
upscalingLevel: sd.upscalingLevel,
upscalingStrength: sd.upscalingStrength,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return {
isESRGANAvailable: system.isESRGANAvailable,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const ESRGANOptions = () => {
const { upscalingLevel, upscalingStrength } = useAppSelector(sdSelector);
const { isESRGANAvailable } = useAppSelector(systemSelector);
const dispatch = useAppDispatch();
return (
<Flex direction={'column'} gap={2}>
<SDSelect
isDisabled={!isESRGANAvailable}
label='Scale'
value={upscalingLevel}
onChange={(e) =>
dispatch(
setUpscalingLevel(
Number(e.target.value) as UpscalingLevel
)
)
}
validValues={UPSCALING_LEVELS}
/>
<SDNumberInput
isDisabled={!isESRGANAvailable}
label='Strength'
step={0.05}
min={0}
max={1}
onChange={(v) => dispatch(setUpscalingStrength(Number(v)))}
value={upscalingStrength}
/>
</Flex>
);
};
export default ESRGANOptions;

View File

@ -0,0 +1,63 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { SDState, setGfpganStrength } from '../sd/sdSlice';
import SDNumberInput from '../../components/SDNumberInput';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { SystemState } from '../system/systemSlice';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
gfpganStrength: sd.gfpganStrength,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return {
isGFPGANAvailable: system.isGFPGANAvailable,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const GFPGANOptions = () => {
const { gfpganStrength } = useAppSelector(sdSelector);
const { isGFPGANAvailable } = useAppSelector(systemSelector);
const dispatch = useAppDispatch();
return (
<Flex direction={'column'} gap={2}>
<SDNumberInput
isDisabled={!isGFPGANAvailable}
label='Strength'
step={0.05}
min={0}
max={1}
onChange={(v) => dispatch(setGfpganStrength(Number(v)))}
value={gfpganStrength}
/>
</Flex>
);
};
export default GFPGANOptions;

View File

@ -0,0 +1,54 @@
import { Flex } from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import SDNumberInput from '../../components/SDNumberInput';
import SDSwitch from '../../components/SDSwitch';
import InitImage from './InitImage';
import {
SDState,
setImg2imgStrength,
setShouldFitToWidthHeight,
} from './sdSlice';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
initialImagePath: sd.initialImagePath,
img2imgStrength: sd.img2imgStrength,
shouldFitToWidthHeight: sd.shouldFitToWidthHeight,
};
}
);
const ImageToImageOptions = () => {
const { initialImagePath, img2imgStrength, shouldFitToWidthHeight } =
useAppSelector(sdSelector);
const dispatch = useAppDispatch();
return (
<Flex direction={'column'} gap={2}>
<SDNumberInput
isDisabled={!initialImagePath}
label='Strength'
step={0.01}
min={0}
max={1}
onChange={(v) => dispatch(setImg2imgStrength(Number(v)))}
value={img2imgStrength}
/>
<SDSwitch
isDisabled={!initialImagePath}
label='Fit initial image to output size'
isChecked={shouldFitToWidthHeight}
onChange={(e) =>
dispatch(setShouldFitToWidthHeight(e.target.checked))
}
/>
<InitImage />
</Flex>
);
};
export default ImageToImageOptions;

View File

@ -0,0 +1,20 @@
.checkerboard {
background-position: 0px 0px, 10px 10px;
background-size: 20px 20px;
background-image: linear-gradient(
45deg,
#eee 25%,
transparent 25%,
transparent 75%,
#eee 75%,
#eee 100%
),
linear-gradient(
45deg,
#eee 25%,
white 25%,
white 75%,
#eee 75%,
#eee 100%
);
}

View File

@ -0,0 +1,155 @@
import {
Button,
Flex,
IconButton,
Image,
useToast,
} from '@chakra-ui/react';
import { SyntheticEvent, useCallback, useState } from 'react';
import { FileRejection, useDropzone } from 'react-dropzone';
import { FaTrash } from 'react-icons/fa';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import {
SDState,
setInitialImagePath,
setMaskPath,
} from '../../features/sd/sdSlice';
import MaskUploader from './MaskUploader';
import './InitImage.css';
import { uploadInitialImage } from '../../app/socketio';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
initialImagePath: sd.initialImagePath,
maskPath: sd.maskPath,
};
},
{ memoizeOptions: { resultEqualityCheck: isEqual } }
);
const InitImage = () => {
const toast = useToast();
const dispatch = useAppDispatch();
const { initialImagePath, maskPath } = useAppSelector(sdSelector);
const onDrop = useCallback(
(acceptedFiles: Array<File>, fileRejections: Array<FileRejection>) => {
fileRejections.forEach((rejection: FileRejection) => {
const msg = rejection.errors.reduce(
(acc: string, cur: { message: string }) => acc + '\n' + cur.message,
''
);
toast({
title: 'Upload failed',
description: msg,
status: 'error',
isClosable: true,
});
});
acceptedFiles.forEach((file: File) => {
dispatch(uploadInitialImage(file));
});
},
[dispatch, toast]
);
const { getRootProps, getInputProps, open } = useDropzone({
onDrop,
accept: {
'image/jpeg': ['.jpg', '.jpeg', '.png'],
},
});
const [shouldShowMask, setShouldShowMask] = useState<boolean>(false);
const handleClickUploadIcon = (e: SyntheticEvent) => {
e.stopPropagation();
open();
};
const handleClickResetInitialImageAndMask = (e: SyntheticEvent) => {
e.stopPropagation();
dispatch(setInitialImagePath(''));
dispatch(setMaskPath(''));
};
const handleMouseOverInitialImageUploadButton = () =>
setShouldShowMask(false);
const handleMouseOutInitialImageUploadButton = () => setShouldShowMask(true);
const handleMouseOverMaskUploadButton = () => setShouldShowMask(true);
const handleMouseOutMaskUploadButton = () => setShouldShowMask(true);
return (
<Flex
{...getRootProps({
onClick: initialImagePath ? (e) => e.stopPropagation() : undefined,
})}
direction={'column'}
alignItems={'center'}
gap={2}
>
<input {...getInputProps({ multiple: false })} />
<Flex gap={2} justifyContent={'space-between'} width={'100%'}>
<Button
size={'sm'}
fontSize={'md'}
fontWeight={'normal'}
onClick={handleClickUploadIcon}
onMouseOver={handleMouseOverInitialImageUploadButton}
onMouseOut={handleMouseOutInitialImageUploadButton}
>
Upload Image
</Button>
<MaskUploader>
<Button
size={'sm'}
fontSize={'md'}
fontWeight={'normal'}
onClick={handleClickUploadIcon}
onMouseOver={handleMouseOverMaskUploadButton}
onMouseOut={handleMouseOutMaskUploadButton}
>
Upload Mask
</Button>
</MaskUploader>
<IconButton
size={'sm'}
aria-label={'Reset initial image and mask'}
onClick={handleClickResetInitialImageAndMask}
icon={<FaTrash />}
/>
</Flex>
{initialImagePath && (
<Flex position={'relative'} width={'100%'}>
<Image
fit={'contain'}
src={initialImagePath}
rounded={'md'}
className={'checkerboard'}
/>
{shouldShowMask && maskPath && (
<Image
position={'absolute'}
top={0}
left={0}
fit={'contain'}
src={maskPath}
rounded={'md'}
zIndex={1}
className={'checkerboard'}
/>
)}
</Flex>
)}
</Flex>
);
};
export default InitImage;

View File

@ -0,0 +1,61 @@
import { useToast } from '@chakra-ui/react';
import { cloneElement, ReactElement, SyntheticEvent, useCallback } from 'react';
import { FileRejection, useDropzone } from 'react-dropzone';
import { useAppDispatch } from '../../app/hooks';
import { uploadMaskImage } from '../../app/socketio';
type Props = {
children: ReactElement;
};
const MaskUploader = ({ children }: Props) => {
const dispatch = useAppDispatch();
const toast = useToast();
const onDrop = useCallback(
(acceptedFiles: Array<File>, fileRejections: Array<FileRejection>) => {
fileRejections.forEach((rejection: FileRejection) => {
const msg = rejection.errors.reduce(
(acc: string, cur: { message: string }) =>
acc + '\n' + cur.message,
''
);
toast({
title: 'Upload failed',
description: msg,
status: 'error',
isClosable: true,
});
});
acceptedFiles.forEach((file: File) => {
dispatch(uploadMaskImage(file));
});
},
[dispatch, toast]
);
const { getRootProps, getInputProps, open } = useDropzone({
onDrop,
accept: {
'image/jpeg': ['.jpg', '.jpeg', '.png'],
},
});
const handleClickUploadIcon = (e: SyntheticEvent) => {
e.stopPropagation();
open();
};
return (
<div {...getRootProps()}>
<input {...getInputProps({ multiple: false })} />
{cloneElement(children, {
onClick: handleClickUploadIcon,
})}
</div>
);
};
export default MaskUploader;

View File

@ -0,0 +1,211 @@
import {
Flex,
Box,
Text,
Accordion,
AccordionItem,
AccordionButton,
AccordionIcon,
AccordionPanel,
Switch,
} from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import {
setShouldRunGFPGAN,
setShouldRunESRGAN,
SDState,
setShouldUseInitImage,
} from '../sd/sdSlice';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { setOpenAccordions, SystemState } from '../system/systemSlice';
import SeedVariationOptions from './SeedVariationOptions';
import SamplerOptions from './SamplerOptions';
import ESRGANOptions from './ESRGANOptions';
import GFPGANOptions from './GFPGANOptions';
import OutputOptions from './OutputOptions';
import ImageToImageOptions from './ImageToImageOptions';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
initialImagePath: sd.initialImagePath,
shouldUseInitImage: sd.shouldUseInitImage,
shouldRunESRGAN: sd.shouldRunESRGAN,
shouldRunGFPGAN: sd.shouldRunGFPGAN,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return {
isGFPGANAvailable: system.isGFPGANAvailable,
isESRGANAvailable: system.isESRGANAvailable,
openAccordions: system.openAccordions,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const OptionsAccordion = () => {
const {
shouldRunESRGAN,
shouldRunGFPGAN,
shouldUseInitImage,
initialImagePath,
} = useAppSelector(sdSelector);
const { isGFPGANAvailable, isESRGANAvailable, openAccordions } =
useAppSelector(systemSelector);
const dispatch = useAppDispatch();
return (
<Accordion
defaultIndex={openAccordions}
allowMultiple
reduceMotion
onChange={(openAccordions) =>
dispatch(setOpenAccordions(openAccordions))
}
>
<AccordionItem>
<h2>
<AccordionButton>
<Box flex='1' textAlign='left'>
Seed & Variation
</Box>
<AccordionIcon />
</AccordionButton>
</h2>
<AccordionPanel>
<SeedVariationOptions />
</AccordionPanel>
</AccordionItem>
<AccordionItem>
<h2>
<AccordionButton>
<Box flex='1' textAlign='left'>
Sampler
</Box>
<AccordionIcon />
</AccordionButton>
</h2>
<AccordionPanel>
<SamplerOptions />
</AccordionPanel>
</AccordionItem>
<AccordionItem>
<h2>
<AccordionButton>
<Flex
justifyContent={'space-between'}
alignItems={'center'}
width={'100%'}
mr={2}
>
<Text>Upscale (ESRGAN)</Text>
<Switch
isDisabled={!isESRGANAvailable}
isChecked={shouldRunESRGAN}
onChange={(e) =>
dispatch(
setShouldRunESRGAN(e.target.checked)
)
}
/>
</Flex>
<AccordionIcon />
</AccordionButton>
</h2>
<AccordionPanel>
<ESRGANOptions />
</AccordionPanel>
</AccordionItem>
<AccordionItem>
<h2>
<AccordionButton>
<Flex
justifyContent={'space-between'}
alignItems={'center'}
width={'100%'}
mr={2}
>
<Text>Fix Faces (GFPGAN)</Text>
<Switch
isDisabled={!isGFPGANAvailable}
isChecked={shouldRunGFPGAN}
onChange={(e) =>
dispatch(
setShouldRunGFPGAN(e.target.checked)
)
}
/>
</Flex>
<AccordionIcon />
</AccordionButton>
</h2>
<AccordionPanel>
<GFPGANOptions />
</AccordionPanel>
</AccordionItem>
<AccordionItem>
<h2>
<AccordionButton>
<Flex
justifyContent={'space-between'}
alignItems={'center'}
width={'100%'}
mr={2}
>
<Text>Image to Image</Text>
<Switch
isDisabled={!initialImagePath}
isChecked={shouldUseInitImage}
onChange={(e) =>
dispatch(
setShouldUseInitImage(e.target.checked)
)
}
/>
</Flex>
<AccordionIcon />
</AccordionButton>
</h2>
<AccordionPanel>
<ImageToImageOptions />
</AccordionPanel>
</AccordionItem>
<AccordionItem>
<h2>
<AccordionButton>
<Box flex='1' textAlign='left'>
Output
</Box>
<AccordionIcon />
</AccordionButton>
</h2>
<AccordionPanel>
<OutputOptions />
</AccordionPanel>
</AccordionItem>
</Accordion>
);
};
export default OptionsAccordion;

View File

@ -0,0 +1,66 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { setHeight, setWidth, setSeamless, SDState } from '../sd/sdSlice';
import SDSelect from '../../components/SDSelect';
import { HEIGHTS, WIDTHS } from '../../app/constants';
import SDSwitch from '../../components/SDSwitch';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
height: sd.height,
width: sd.width,
seamless: sd.seamless,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const OutputOptions = () => {
const { height, width, seamless } = useAppSelector(sdSelector);
const dispatch = useAppDispatch();
return (
<Flex gap={2} direction={'column'}>
<Flex gap={2}>
<SDSelect
label='Width'
value={width}
flexGrow={1}
onChange={(e) => dispatch(setWidth(Number(e.target.value)))}
validValues={WIDTHS}
/>
<SDSelect
label='Height'
value={height}
flexGrow={1}
onChange={(e) =>
dispatch(setHeight(Number(e.target.value)))
}
validValues={HEIGHTS}
/>
</Flex>
<SDSwitch
label='Seamless tiling'
fontSize={'md'}
isChecked={seamless}
onChange={(e) => dispatch(setSeamless(e.target.checked))}
/>
</Flex>
);
};
export default OutputOptions;

View File

@ -0,0 +1,58 @@
import { Flex } from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { cancelProcessing, generateImage } from '../../app/socketio';
import { RootState } from '../../app/store';
import SDButton from '../../components/SDButton';
import { SystemState } from '../system/systemSlice';
import useCheckParameters from '../system/useCheckParameters';
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return {
isProcessing: system.isProcessing,
isConnected: system.isConnected,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const ProcessButtons = () => {
const { isProcessing, isConnected } = useAppSelector(systemSelector);
const dispatch = useAppDispatch();
const isReady = useCheckParameters();
return (
<Flex gap={2} direction={'column'} alignItems={'space-between'} height={'100%'}>
<SDButton
label='Generate'
type='submit'
colorScheme='green'
flexGrow={1}
isDisabled={!isReady}
fontSize={'md'}
size={'md'}
onClick={() => dispatch(generateImage())}
/>
<SDButton
label='Cancel'
colorScheme='red'
flexGrow={1}
fontSize={'md'}
size={'md'}
isDisabled={!isConnected || !isProcessing}
onClick={() => dispatch(cancelProcessing())}
/>
</Flex>
);
};
export default ProcessButtons;

View File

@ -0,0 +1,25 @@
import { Textarea } from '@chakra-ui/react';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import { setPrompt } from '../sd/sdSlice';
const PromptInput = () => {
const { prompt } = useAppSelector((state: RootState) => state.sd);
const dispatch = useAppDispatch();
return (
<Textarea
id='prompt'
name='prompt'
resize='none'
size={'lg'}
height={'100%'}
isInvalid={!prompt.length}
onChange={(e) => dispatch(setPrompt(e.target.value))}
value={prompt}
placeholder="I'm dreaming of..."
/>
);
};
export default PromptInput;

View File

@ -0,0 +1,51 @@
import {
Slider,
SliderTrack,
SliderFilledTrack,
SliderThumb,
FormControl,
FormLabel,
Text,
Flex,
SliderProps,
} from '@chakra-ui/react';
interface Props extends SliderProps {
label: string;
value: number;
fontSize?: number | string;
}
const SDSlider = ({
label,
value,
fontSize = 'sm',
onChange,
...rest
}: Props) => {
return (
<FormControl>
<Flex gap={2}>
<FormLabel marginInlineEnd={0} marginBottom={1}>
<Text fontSize={fontSize} whiteSpace='nowrap'>
{label}
</Text>
</FormLabel>
<Slider
aria-label={label}
focusThumbOnChange={true}
value={value}
onChange={onChange}
{...rest}
>
<SliderTrack>
<SliderFilledTrack />
</SliderTrack>
<SliderThumb />
</Slider>
</Flex>
</FormControl>
);
};
export default SDSlider;

View File

@ -0,0 +1,62 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { setCfgScale, setSampler, setSteps, SDState } from '../sd/sdSlice';
import SDNumberInput from '../../components/SDNumberInput';
import SDSelect from '../../components/SDSelect';
import { SAMPLERS } from '../../app/constants';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
steps: sd.steps,
cfgScale: sd.cfgScale,
sampler: sd.sampler,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const SamplerOptions = () => {
const { steps, cfgScale, sampler } = useAppSelector(sdSelector);
const dispatch = useAppDispatch();
return (
<Flex gap={2} direction={'column'}>
<SDNumberInput
label='Steps'
min={1}
step={1}
precision={0}
onChange={(v) => dispatch(setSteps(Number(v)))}
value={steps}
/>
<SDNumberInput
label='CFG scale'
step={0.5}
onChange={(v) => dispatch(setCfgScale(Number(v)))}
value={cfgScale}
/>
<SDSelect
label='Sampler'
value={sampler}
onChange={(e) => dispatch(setSampler(e.target.value))}
validValues={SAMPLERS}
/>
</Flex>
);
};
export default SamplerOptions;

View File

@ -0,0 +1,144 @@
import {
Flex,
Input,
HStack,
FormControl,
FormLabel,
Text,
Button,
} from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { NUMPY_RAND_MAX, NUMPY_RAND_MIN } from '../../app/constants';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import SDNumberInput from '../../components/SDNumberInput';
import SDSwitch from '../../components/SDSwitch';
import {
randomizeSeed,
SDState,
setIterations,
setSeed,
setSeedWeights,
setShouldGenerateVariations,
setShouldRandomizeSeed,
setVariantAmount,
} from './sdSlice';
import { validateSeedWeights } from './util/seedWeightPairs';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
variantAmount: sd.variantAmount,
seedWeights: sd.seedWeights,
shouldGenerateVariations: sd.shouldGenerateVariations,
shouldRandomizeSeed: sd.shouldRandomizeSeed,
seed: sd.seed,
iterations: sd.iterations,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const SeedVariationOptions = () => {
const {
shouldGenerateVariations,
variantAmount,
seedWeights,
shouldRandomizeSeed,
seed,
iterations,
} = useAppSelector(sdSelector);
const dispatch = useAppDispatch();
return (
<Flex gap={2} direction={'column'}>
<SDNumberInput
label='Images to generate'
step={1}
min={1}
precision={0}
onChange={(v) => dispatch(setIterations(Number(v)))}
value={iterations}
/>
<SDSwitch
label='Randomize seed on generation'
isChecked={shouldRandomizeSeed}
onChange={(e) =>
dispatch(setShouldRandomizeSeed(e.target.checked))
}
/>
<Flex gap={2}>
<SDNumberInput
label='Seed'
step={1}
precision={0}
flexGrow={1}
min={NUMPY_RAND_MIN}
max={NUMPY_RAND_MAX}
isDisabled={shouldRandomizeSeed}
isInvalid={seed < 0 && shouldGenerateVariations}
onChange={(v) => dispatch(setSeed(Number(v)))}
value={seed}
/>
<Button
size={'sm'}
isDisabled={shouldRandomizeSeed}
onClick={() => dispatch(randomizeSeed())}
>
<Text pl={2} pr={2}>
Shuffle
</Text>
</Button>
</Flex>
<SDSwitch
label='Generate variations'
isChecked={shouldGenerateVariations}
width={'auto'}
onChange={(e) =>
dispatch(setShouldGenerateVariations(e.target.checked))
}
/>
<SDNumberInput
label='Variation amount'
value={variantAmount}
step={0.01}
min={0}
max={1}
isDisabled={!shouldGenerateVariations}
onChange={(v) => dispatch(setVariantAmount(Number(v)))}
/>
<FormControl
isInvalid={
shouldGenerateVariations &&
!(validateSeedWeights(seedWeights) || seedWeights === '')
}
flexGrow={1}
isDisabled={!shouldGenerateVariations}
>
<HStack>
<FormLabel marginInlineEnd={0} marginBottom={1}>
<Text whiteSpace='nowrap'>
Seed Weights
</Text>
</FormLabel>
<Input
size={'sm'}
value={seedWeights}
onChange={(e) =>
dispatch(setSeedWeights(e.target.value))
}
/>
</HStack>
</FormControl>
</Flex>
);
};
export default SeedVariationOptions;

View File

@ -0,0 +1,92 @@
import {
Flex,
FormControl,
FormLabel,
HStack,
Input,
Text,
} from '@chakra-ui/react';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import SDNumberInput from '../../components/SDNumberInput';
import SDSwitch from '../../components/SDSwitch';
import {
SDState,
setSeedWeights,
setShouldGenerateVariations,
setVariantAmount,
} from './sdSlice';
import { validateSeedWeights } from './util/seedWeightPairs';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
variantAmount: sd.variantAmount,
seedWeights: sd.seedWeights,
shouldGenerateVariations: sd.shouldGenerateVariations,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const Variant = () => {
const { shouldGenerateVariations, variantAmount, seedWeights } =
useAppSelector(sdSelector);
const dispatch = useAppDispatch();
return (
<Flex gap={2} alignItems={'center'} pl={1}>
<SDSwitch
label='Generate variations'
isChecked={shouldGenerateVariations}
width={'auto'}
onChange={(e) =>
dispatch(setShouldGenerateVariations(e.target.checked))
}
/>
<SDNumberInput
label='Amount'
value={variantAmount}
step={0.01}
min={0}
max={1}
width={240}
isDisabled={!shouldGenerateVariations}
onChange={(v) => dispatch(setVariantAmount(Number(v)))}
/>
<FormControl
isInvalid={
shouldGenerateVariations &&
!(validateSeedWeights(seedWeights) || seedWeights === '')
}
flexGrow={1}
isDisabled={!shouldGenerateVariations}
>
<HStack>
<FormLabel marginInlineEnd={0} marginBottom={1}>
<Text fontSize={'sm'} whiteSpace='nowrap'>
Seed Weights
</Text>
</FormLabel>
<Input
size={'sm'}
value={seedWeights}
onChange={(e) =>
dispatch(setSeedWeights(e.target.value))
}
/>
</HStack>
</FormControl>
</Flex>
);
};
export default Variant;

View File

@ -0,0 +1,283 @@
import { createSlice } from '@reduxjs/toolkit';
import type { PayloadAction } from '@reduxjs/toolkit';
import { SDMetadata } from '../gallery/gallerySlice';
import randomInt from './util/randomInt';
import { NUMPY_RAND_MAX, NUMPY_RAND_MIN } from '../../app/constants';
const calculateRealSteps = (
steps: number,
strength: number,
hasInitImage: boolean
): number => {
return hasInitImage ? Math.floor(strength * steps) : steps;
};
export type UpscalingLevel = 0 | 2 | 3 | 4;
export interface SDState {
prompt: string;
iterations: number;
steps: number;
realSteps: number;
cfgScale: number;
height: number;
width: number;
sampler: string;
seed: number;
img2imgStrength: number;
gfpganStrength: number;
upscalingLevel: UpscalingLevel;
upscalingStrength: number;
shouldUseInitImage: boolean;
initialImagePath: string;
maskPath: string;
seamless: boolean;
shouldFitToWidthHeight: boolean;
shouldGenerateVariations: boolean;
variantAmount: number;
seedWeights: string;
shouldRunESRGAN: boolean;
shouldRunGFPGAN: boolean;
shouldRandomizeSeed: boolean;
}
const initialSDState: SDState = {
prompt: '',
iterations: 1,
steps: 50,
realSteps: 50,
cfgScale: 7.5,
height: 512,
width: 512,
sampler: 'k_lms',
seed: 0,
seamless: false,
shouldUseInitImage: false,
img2imgStrength: 0.75,
initialImagePath: '',
maskPath: '',
shouldFitToWidthHeight: true,
shouldGenerateVariations: false,
variantAmount: 0.1,
seedWeights: '',
shouldRunESRGAN: false,
upscalingLevel: 4,
upscalingStrength: 0.75,
shouldRunGFPGAN: false,
gfpganStrength: 0.8,
shouldRandomizeSeed: true,
};
const initialState: SDState = initialSDState;
export const sdSlice = createSlice({
name: 'sd',
initialState,
reducers: {
setPrompt: (state, action: PayloadAction<string>) => {
state.prompt = action.payload;
},
setIterations: (state, action: PayloadAction<number>) => {
state.iterations = action.payload;
},
setSteps: (state, action: PayloadAction<number>) => {
const { img2imgStrength, initialImagePath } = state;
const steps = action.payload;
state.steps = steps;
state.realSteps = calculateRealSteps(
steps,
img2imgStrength,
Boolean(initialImagePath)
);
},
setCfgScale: (state, action: PayloadAction<number>) => {
state.cfgScale = action.payload;
},
setHeight: (state, action: PayloadAction<number>) => {
state.height = action.payload;
},
setWidth: (state, action: PayloadAction<number>) => {
state.width = action.payload;
},
setSampler: (state, action: PayloadAction<string>) => {
state.sampler = action.payload;
},
setSeed: (state, action: PayloadAction<number>) => {
state.seed = action.payload;
state.shouldRandomizeSeed = false;
},
setImg2imgStrength: (state, action: PayloadAction<number>) => {
const img2imgStrength = action.payload;
const { steps, initialImagePath } = state;
state.img2imgStrength = img2imgStrength;
state.realSteps = calculateRealSteps(
steps,
img2imgStrength,
Boolean(initialImagePath)
);
},
setGfpganStrength: (state, action: PayloadAction<number>) => {
state.gfpganStrength = action.payload;
},
setUpscalingLevel: (state, action: PayloadAction<UpscalingLevel>) => {
state.upscalingLevel = action.payload;
},
setUpscalingStrength: (state, action: PayloadAction<number>) => {
state.upscalingStrength = action.payload;
},
setShouldUseInitImage: (state, action: PayloadAction<boolean>) => {
state.shouldUseInitImage = action.payload;
},
setInitialImagePath: (state, action: PayloadAction<string>) => {
const initialImagePath = action.payload;
const { steps, img2imgStrength } = state;
state.shouldUseInitImage = initialImagePath ? true : false;
state.initialImagePath = initialImagePath;
state.realSteps = calculateRealSteps(
steps,
img2imgStrength,
Boolean(initialImagePath)
);
},
setMaskPath: (state, action: PayloadAction<string>) => {
state.maskPath = action.payload;
},
setSeamless: (state, action: PayloadAction<boolean>) => {
state.seamless = action.payload;
},
setShouldFitToWidthHeight: (state, action: PayloadAction<boolean>) => {
state.shouldFitToWidthHeight = action.payload;
},
resetSeed: (state) => {
state.seed = -1;
},
randomizeSeed: (state) => {
state.seed = randomInt(NUMPY_RAND_MIN, NUMPY_RAND_MAX);
},
setParameter: (
state,
action: PayloadAction<{ key: string; value: string | number | boolean }>
) => {
const { key, value } = action.payload;
const temp = { ...state, [key]: value };
if (key === 'seed') {
temp.shouldRandomizeSeed = false;
}
if (key === 'initialImagePath' && value === '') {
temp.shouldUseInitImage = false;
}
return temp;
},
setShouldGenerateVariations: (state, action: PayloadAction<boolean>) => {
state.shouldGenerateVariations = action.payload;
},
setVariantAmount: (state, action: PayloadAction<number>) => {
state.variantAmount = action.payload;
},
setSeedWeights: (state, action: PayloadAction<string>) => {
state.seedWeights = action.payload;
},
setAllParameters: (state, action: PayloadAction<SDMetadata>) => {
const {
prompt,
steps,
cfgScale,
height,
width,
sampler,
seed,
img2imgStrength,
gfpganStrength,
upscalingLevel,
upscalingStrength,
initialImagePath,
maskPath,
seamless,
shouldFitToWidthHeight,
} = action.payload;
// ?? = falsy values ('', 0, etc) are used
// || = falsy values not used
state.prompt = prompt ?? state.prompt;
state.steps = steps || state.steps;
state.cfgScale = cfgScale || state.cfgScale;
state.width = width || state.width;
state.height = height || state.height;
state.sampler = sampler || state.sampler;
state.seed = seed ?? state.seed;
state.seamless = seamless ?? state.seamless;
state.shouldFitToWidthHeight =
shouldFitToWidthHeight ?? state.shouldFitToWidthHeight;
state.img2imgStrength = img2imgStrength ?? state.img2imgStrength;
state.gfpganStrength = gfpganStrength ?? state.gfpganStrength;
state.upscalingLevel = upscalingLevel ?? state.upscalingLevel;
state.upscalingStrength = upscalingStrength ?? state.upscalingStrength;
state.initialImagePath = initialImagePath ?? state.initialImagePath;
state.maskPath = maskPath ?? state.maskPath;
// If the image whose parameters we are using has a seed, disable randomizing the seed
if (seed) {
state.shouldRandomizeSeed = false;
}
// if we have a gfpgan strength, enable it
state.shouldRunGFPGAN = gfpganStrength ? true : false;
// if we have a esrgan strength, enable it
state.shouldRunESRGAN = upscalingLevel ? true : false;
// if we want to recreate an image exactly, we disable variations
state.shouldGenerateVariations = false;
state.shouldUseInitImage = initialImagePath ? true : false;
},
resetSDState: (state) => {
return {
...state,
...initialSDState,
};
},
setShouldRunGFPGAN: (state, action: PayloadAction<boolean>) => {
state.shouldRunGFPGAN = action.payload;
},
setShouldRunESRGAN: (state, action: PayloadAction<boolean>) => {
state.shouldRunESRGAN = action.payload;
},
setShouldRandomizeSeed: (state, action: PayloadAction<boolean>) => {
state.shouldRandomizeSeed = action.payload;
},
},
});
export const {
setPrompt,
setIterations,
setSteps,
setCfgScale,
setHeight,
setWidth,
setSampler,
setSeed,
setSeamless,
setImg2imgStrength,
setGfpganStrength,
setUpscalingLevel,
setUpscalingStrength,
setShouldUseInitImage,
setInitialImagePath,
setMaskPath,
resetSeed,
randomizeSeed,
resetSDState,
setShouldFitToWidthHeight,
setParameter,
setShouldGenerateVariations,
setSeedWeights,
setVariantAmount,
setAllParameters,
setShouldRunGFPGAN,
setShouldRunESRGAN,
setShouldRandomizeSeed,
} = sdSlice.actions;
export default sdSlice.reducer;

View File

@ -0,0 +1,5 @@
const randomInt = (min: number, max: number): number => {
return Math.floor(Math.random() * (max - min + 1) + min);
};
export default randomInt;

View File

@ -0,0 +1,56 @@
export interface SeedWeightPair {
seed: number;
weight: number;
}
export type SeedWeights = Array<Array<number>>;
export const stringToSeedWeights = (string: string): SeedWeights | boolean => {
const stringPairs = string.split(',');
const arrPairs = stringPairs.map((p) => p.split(':'));
const pairs = arrPairs.map((p) => {
return [parseInt(p[0]), parseFloat(p[1])];
});
if (!validateSeedWeights(pairs)) {
return false;
}
return pairs;
};
export const validateSeedWeights = (
seedWeights: SeedWeights | string
): boolean => {
return typeof seedWeights === 'string'
? Boolean(stringToSeedWeights(seedWeights))
: Boolean(
seedWeights.length &&
!seedWeights.some((pair) => {
const [seed, weight] = pair;
const isSeedValid = !isNaN(parseInt(seed.toString(), 10));
const isWeightValid =
!isNaN(parseInt(weight.toString(), 10)) &&
weight >= 0 &&
weight <= 1;
return !(isSeedValid && isWeightValid);
})
);
};
export const seedWeightsToString = (
seedWeights: SeedWeights
): string | boolean => {
if (!validateSeedWeights(seedWeights)) {
return false;
}
return seedWeights.reduce((acc, pair, i, arr) => {
const [seed, weight] = pair;
acc += `${seed}:${weight}`;
if (i !== arr.length - 1) {
acc += ',';
}
return acc;
}, '');
};

View File

@ -0,0 +1,125 @@
import {
IconButton,
useColorModeValue,
Flex,
Text,
Tooltip,
} from '@chakra-ui/react';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import { setShouldShowLogViewer, SystemState } from './systemSlice';
import { useLayoutEffect, useRef, useState } from 'react';
import { FaAngleDoubleDown, FaCode, FaMinus } from 'react-icons/fa';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
const logSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => system.log,
{
memoizeOptions: {
resultEqualityCheck: (a, b) => a.length === b.length,
},
}
);
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return { shouldShowLogViewer: system.shouldShowLogViewer };
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const LogViewer = () => {
const dispatch = useAppDispatch();
const bg = useColorModeValue('gray.50', 'gray.900');
const borderColor = useColorModeValue('gray.500', 'gray.500');
const [shouldAutoscroll, setShouldAutoscroll] = useState<boolean>(true);
const log = useAppSelector(logSelector);
const { shouldShowLogViewer } = useAppSelector(systemSelector);
const viewerRef = useRef<HTMLDivElement>(null);
useLayoutEffect(() => {
if (viewerRef.current !== null && shouldAutoscroll) {
viewerRef.current.scrollTop = viewerRef.current.scrollHeight;
}
});
return (
<>
{shouldShowLogViewer && (
<Flex
position={'fixed'}
left={0}
bottom={0}
height='200px'
width='100vw'
overflow='auto'
direction='column'
fontFamily='monospace'
fontSize='sm'
pl={12}
pr={2}
pb={2}
borderTopWidth='4px'
borderColor={borderColor}
background={bg}
ref={viewerRef}
>
{log.map((entry, i) => (
<Flex gap={2} key={i}>
<Text fontSize='sm' fontWeight={'semibold'}>
{entry.timestamp}:
</Text>
<Text fontSize='sm' wordBreak={'break-all'}>
{entry.message}
</Text>
</Flex>
))}
</Flex>
)}
{shouldShowLogViewer && (
<Tooltip
label={
shouldAutoscroll ? 'Autoscroll on' : 'Autoscroll off'
}
>
<IconButton
size='sm'
position={'fixed'}
left={2}
bottom={12}
aria-label='Toggle autoscroll'
variant={'solid'}
colorScheme={shouldAutoscroll ? 'blue' : 'gray'}
icon={<FaAngleDoubleDown />}
onClick={() => setShouldAutoscroll(!shouldAutoscroll)}
/>
</Tooltip>
)}
<Tooltip label={shouldShowLogViewer ? 'Hide logs' : 'Show logs'}>
<IconButton
size='sm'
position={'fixed'}
left={2}
bottom={2}
variant={'solid'}
aria-label='Toggle Log Viewer'
icon={shouldShowLogViewer ? <FaMinus /> : <FaCode />}
onClick={() =>
dispatch(setShouldShowLogViewer(!shouldShowLogViewer))
}
/>
</Tooltip>
</>
);
};
export default LogViewer;

View File

@ -0,0 +1,170 @@
import {
Flex,
FormControl,
FormLabel,
Heading,
HStack,
Modal,
ModalBody,
ModalCloseButton,
ModalContent,
ModalFooter,
ModalHeader,
ModalOverlay,
Switch,
Text,
useDisclosure,
} from '@chakra-ui/react';
import { useAppDispatch, useAppSelector } from '../../app/hooks';
import {
setShouldConfirmOnDelete,
setShouldDisplayInProgress,
SystemState,
} from './systemSlice';
import { RootState } from '../../app/store';
import SDButton from '../../components/SDButton';
import { persistor } from '../../main';
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { cloneElement, ReactElement } from 'react';
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
const { shouldDisplayInProgress, shouldConfirmOnDelete } = system;
return { shouldDisplayInProgress, shouldConfirmOnDelete };
},
{
memoizeOptions: { resultEqualityCheck: isEqual },
}
);
type Props = {
children: ReactElement;
};
const SettingsModal = ({ children }: Props) => {
const {
isOpen: isSettingsModalOpen,
onOpen: onSettingsModalOpen,
onClose: onSettingsModalClose,
} = useDisclosure();
const {
isOpen: isRefreshModalOpen,
onOpen: onRefreshModalOpen,
onClose: onRefreshModalClose,
} = useDisclosure();
const { shouldDisplayInProgress, shouldConfirmOnDelete } =
useAppSelector(systemSelector);
const dispatch = useAppDispatch();
const handleClickResetWebUI = () => {
persistor.purge().then(() => {
onSettingsModalClose();
onRefreshModalOpen();
});
};
return (
<>
{cloneElement(children, {
onClick: onSettingsModalOpen,
})}
<Modal isOpen={isSettingsModalOpen} onClose={onSettingsModalClose}>
<ModalOverlay />
<ModalContent>
<ModalHeader>Settings</ModalHeader>
<ModalCloseButton />
<ModalBody>
<Flex gap={5} direction='column'>
<FormControl>
<HStack>
<FormLabel marginBottom={1}>
Display in-progress images (slower)
</FormLabel>
<Switch
isChecked={shouldDisplayInProgress}
onChange={(e) =>
dispatch(
setShouldDisplayInProgress(
e.target.checked
)
)
}
/>
</HStack>
</FormControl>
<FormControl>
<HStack>
<FormLabel marginBottom={1}>
Confirm on delete
</FormLabel>
<Switch
isChecked={shouldConfirmOnDelete}
onChange={(e) =>
dispatch(
setShouldConfirmOnDelete(
e.target.checked
)
)
}
/>
</HStack>
</FormControl>
<Heading size={'md'}>Reset Web UI</Heading>
<Text>
Resetting the web UI only resets the browser's
local cache of your images and remembered
settings. It does not delete any images from
disk.
</Text>
<Text>
If images aren't showing up in the gallery or
something else isn't working, please try
resetting before submitting an issue on GitHub.
</Text>
<SDButton
label='Reset Web UI'
colorScheme='red'
onClick={handleClickResetWebUI}
/>
</Flex>
</ModalBody>
<ModalFooter>
<SDButton
label='Close'
onClick={onSettingsModalClose}
/>
</ModalFooter>
</ModalContent>
</Modal>
<Modal
closeOnOverlayClick={false}
isOpen={isRefreshModalOpen}
onClose={onRefreshModalClose}
isCentered
>
<ModalOverlay bg='blackAlpha.300' backdropFilter='blur(40px)' />
<ModalContent>
<ModalBody pb={6} pt={6}>
<Flex justifyContent={'center'}>
<Text fontSize={'lg'}>
Web UI has been reset. Refresh the page to
reload.
</Text>
</Flex>
</ModalBody>
</ModalContent>
</Modal>
</>
);
};
export default SettingsModal;

View File

@ -0,0 +1,98 @@
import { createSlice } from '@reduxjs/toolkit';
import type { PayloadAction } from '@reduxjs/toolkit';
import dateFormat from 'dateformat';
import { ExpandedIndex } from '@chakra-ui/react';
export interface LogEntry {
timestamp: string;
message: string;
}
export interface Log {
[index: number]: LogEntry;
}
export interface SystemState {
shouldDisplayInProgress: boolean;
isProcessing: boolean;
currentStep: number;
log: Array<LogEntry>;
shouldShowLogViewer: boolean;
isGFPGANAvailable: boolean;
isESRGANAvailable: boolean;
isConnected: boolean;
socketId: string;
shouldConfirmOnDelete: boolean;
openAccordions: ExpandedIndex;
}
const initialSystemState = {
isConnected: false,
isProcessing: false,
currentStep: 0,
log: [],
shouldShowLogViewer: false,
shouldDisplayInProgress: false,
isGFPGANAvailable: true,
isESRGANAvailable: true,
socketId: '',
shouldConfirmOnDelete: true,
openAccordions: [0],
};
const initialState: SystemState = initialSystemState;
export const systemSlice = createSlice({
name: 'system',
initialState,
reducers: {
setShouldDisplayInProgress: (state, action: PayloadAction<boolean>) => {
state.shouldDisplayInProgress = action.payload;
},
setIsProcessing: (state, action: PayloadAction<boolean>) => {
state.isProcessing = action.payload;
if (action.payload === false) {
state.currentStep = 0;
}
},
setCurrentStep: (state, action: PayloadAction<number>) => {
state.currentStep = action.payload;
},
addLogEntry: (state, action: PayloadAction<string>) => {
const entry: LogEntry = {
timestamp: dateFormat(new Date(), 'isoDateTime'),
message: action.payload,
};
state.log.push(entry);
},
setShouldShowLogViewer: (state, action: PayloadAction<boolean>) => {
state.shouldShowLogViewer = action.payload;
},
setIsConnected: (state, action: PayloadAction<boolean>) => {
state.isConnected = action.payload;
},
setSocketId: (state, action: PayloadAction<string>) => {
state.socketId = action.payload;
},
setShouldConfirmOnDelete: (state, action: PayloadAction<boolean>) => {
state.shouldConfirmOnDelete = action.payload;
},
setOpenAccordions: (state, action: PayloadAction<ExpandedIndex>) => {
state.openAccordions = action.payload;
},
},
});
export const {
setShouldDisplayInProgress,
setIsProcessing,
setCurrentStep,
addLogEntry,
setShouldShowLogViewer,
setIsConnected,
setSocketId,
setShouldConfirmOnDelete,
setOpenAccordions,
} = systemSlice.actions;
export default systemSlice.reducer;

View File

@ -0,0 +1,108 @@
import { createSelector } from '@reduxjs/toolkit';
import { isEqual } from 'lodash';
import { useMemo } from 'react';
import { useAppSelector } from '../../app/hooks';
import { RootState } from '../../app/store';
import { SDState } from '../sd/sdSlice';
import { validateSeedWeights } from '../sd/util/seedWeightPairs';
import { SystemState } from './systemSlice';
const sdSelector = createSelector(
(state: RootState) => state.sd,
(sd: SDState) => {
return {
prompt: sd.prompt,
shouldGenerateVariations: sd.shouldGenerateVariations,
seedWeights: sd.seedWeights,
maskPath: sd.maskPath,
initialImagePath: sd.initialImagePath,
seed: sd.seed,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
const systemSelector = createSelector(
(state: RootState) => state.system,
(system: SystemState) => {
return {
isProcessing: system.isProcessing,
isConnected: system.isConnected,
};
},
{
memoizeOptions: {
resultEqualityCheck: isEqual,
},
}
);
/*
Checks relevant pieces of state to confirm generation will not deterministically fail.
This is used to prevent the 'Generate' button from being clicked.
Other parameter values may cause failure but we rely on input validation for those.
*/
const useCheckParameters = () => {
const {
prompt,
shouldGenerateVariations,
seedWeights,
maskPath,
initialImagePath,
seed,
} = useAppSelector(sdSelector);
const { isProcessing, isConnected } = useAppSelector(systemSelector);
return useMemo(() => {
// Cannot generate without a prompt
if (!prompt) {
return false;
}
// Cannot generate with a mask without img2img
if (maskPath && !initialImagePath) {
return false;
}
// TODO: job queue
// Cannot generate if already processing an image
if (isProcessing) {
return false;
}
// Cannot generate if not connected
if (!isConnected) {
return false;
}
// Cannot generate variations without valid seed weights
if (
shouldGenerateVariations &&
(!(validateSeedWeights(seedWeights) || seedWeights === '') ||
seed === -1)
) {
return false;
}
// All good
return true;
}, [
prompt,
maskPath,
initialImagePath,
isProcessing,
isConnected,
shouldGenerateVariations,
seedWeights,
seed,
]);
};
export default useCheckParameters;

26
frontend/src/main.tsx Normal file
View File

@ -0,0 +1,26 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import { ChakraProvider, ColorModeScript } from '@chakra-ui/react';
import { store } from './app/store';
import { Provider } from 'react-redux';
import { PersistGate } from 'redux-persist/integration/react';
import { persistStore } from 'redux-persist';
export const persistor = persistStore(store);
import App from './App';
import { theme } from './app/theme';
import Loading from './Loading';
ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render(
<React.StrictMode>
<Provider store={store}>
<PersistGate loading={<Loading />} persistor={persistor}>
<ChakraProvider theme={theme}>
<ColorModeScript initialColorMode={theme.config.initialColorMode} />
<App />
</ChakraProvider>
</PersistGate>
</Provider>
</React.StrictMode>
);

1
frontend/src/vite-env.d.ts vendored Normal file
View File

@ -0,0 +1 @@
/// <reference types="vite/client" />

21
frontend/tsconfig.json Normal file
View File

@ -0,0 +1,21 @@
{
"compilerOptions": {
"target": "ESNext",
"useDefineForClassFields": true,
"lib": ["DOM", "DOM.Iterable", "ESNext"],
"allowJs": false,
"skipLibCheck": true,
"esModuleInterop": false,
"allowSyntheticDefaultImports": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"module": "ESNext",
"moduleResolution": "Node",
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "react-jsx"
},
"include": ["src", "index.d.ts"],
"references": [{ "path": "./tsconfig.node.json" }]
}

View File

@ -0,0 +1,9 @@
{
"compilerOptions": {
"composite": true,
"module": "ESNext",
"moduleResolution": "Node",
"allowSyntheticDefaultImports": true
},
"include": ["vite.config.ts"]
}

36
frontend/vite.config.ts Normal file
View File

@ -0,0 +1,36 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react';
import eslint from 'vite-plugin-eslint';
// https://vitejs.dev/config/
export default defineConfig(({ mode }) => {
const common = {
plugins: [react(), eslint()],
server: {
proxy: {
'/outputs': {
target: 'http://localhost:9090/outputs',
changeOrigin: true,
rewrite: (path) => path.replace(/^\/outputs/, ''),
},
},
},
build: {
target: 'esnext',
chunkSizeWarningLimit: 1500, // we don't really care about chunk size
},
};
if (mode == 'development') {
return {
...common,
build: {
...common.build,
// sourcemap: true, // this can be enabled if needed, it adds ovwer 15MB to the commit
},
};
} else {
return {
...common,
};
}
});

3149
frontend/yarn.lock Normal file

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More