mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Merge branch 'development' into main
This commit is contained in:
commit
3a2be621f3
@ -320,3 +320,20 @@ something that depends on it-- Rosetta can translate some Intel instructions but
|
||||
not the specialized ones here. To avoid this, make sure to use the environment
|
||||
variable `CONDA_SUBDIR=osx-arm64`, which restricts the Conda environment to only
|
||||
use ARM packages, and use `nomkl` as described above.
|
||||
|
||||
### input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
|
||||
|
||||
May appear when just starting to generate, e.g.:
|
||||
|
||||
```
|
||||
dream> clouds
|
||||
Generating: 0%| | 0/1 [00:00<?, ?it/s]/Users/[...]/dev/stable-diffusion/ldm/modules/embedding_manager.py:152: UserWarning: The operator 'aten::nonzero' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1662016319283/work/aten/src/ATen/mps/MPSFallback.mm:11.)
|
||||
placeholder_idx = torch.where(
|
||||
loc("mps_add"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/20d6c351-ee94-11ec-bcaf-7247572f23b4/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":219:0)): error: input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
|
||||
LLVM ERROR: Failed to infer result type(s).
|
||||
Abort trap: 6
|
||||
/Users/[...]/opt/anaconda3/envs/ldm/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
|
||||
warnings.warn('resource_tracker: There appear to be %d '
|
||||
```
|
||||
|
||||
Macs do not support autocast/mixed-precision. Supply `--full_precision` to use float32 everywhere.
|
13
README.md
13
README.md
@ -138,6 +138,13 @@ You may also pass a -v<count> option to generate count variants on the original
|
||||
passing the first generated image back into img2img the requested number of times. It generates interesting
|
||||
variants.
|
||||
|
||||
## Seamless Tiling
|
||||
|
||||
The seamless tiling mode causes generated images to seamlessly tile with itself. To use it, add the --seamless option when starting the script which will result in all generated images to tile, or for each dream> prompt as shown here:
|
||||
```
|
||||
dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
|
||||
```
|
||||
|
||||
## GFPGAN and Real-ESRGAN Support
|
||||
|
||||
The script also provides the ability to do face restoration and
|
||||
@ -400,7 +407,11 @@ repository and associated paper for details and limitations.
|
||||
|
||||
# Latest Changes
|
||||
|
||||
- v1.13 (3 September 2022)
|
||||
- v1.14 (In progress)
|
||||
|
||||
- Add "seamless mode" for circular tiling of image. Generates beautiful effects. ([prixt](https://github.com/prixt))
|
||||
|
||||
- v1.13 (3 September 2022
|
||||
|
||||
- Support image variations (see [VARIATIONS](VARIATIONS.md) ([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers)
|
||||
- Supports a Google Colab notebook for a standalone server running on Google hardware [Arturo Mendivil](https://github.com/artmen1516)
|
||||
|
@ -108,6 +108,6 @@ the chosen two images. Here's the one I like best:
|
||||
|
||||
<img src="static/variation_walkthru/000004.3747154981.png">
|
||||
|
||||
As you can see, this is a very powerful too, which when combined with
|
||||
As you can see, this is a very powerful tool, which when combined with
|
||||
subprompt weighting, gives you great control over the content and
|
||||
quality of your generated images.
|
||||
|
@ -1,33 +1,29 @@
|
||||
name: ldm
|
||||
channels:
|
||||
- pytorch-nightly
|
||||
- pytorch
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python==3.9.13
|
||||
- python==3.10.5
|
||||
- pip==22.2.2
|
||||
|
||||
# pytorch-nightly, left unpinned
|
||||
# pytorch left unpinned
|
||||
- pytorch
|
||||
- torchmetrics
|
||||
- torchvision
|
||||
|
||||
# I suggest to keep the other deps sorted for convenience.
|
||||
# If you wish to upgrade to 3.10, try to run this:
|
||||
# To determine what the latest versions should be, run:
|
||||
#
|
||||
# ```shell
|
||||
# CONDA_CMD=conda
|
||||
# sed -E 's/python==3.9.13/python==3.10.5/;s/ldm/ldm-3.10/;21,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > /tmp/environment-mac-updated.yml
|
||||
# CONDA_SUBDIR=osx-arm64 $CONDA_CMD env create -f /tmp/environment-mac-updated.yml && $CONDA_CMD list -n ldm-3.10 | awk ' {print " - " $1 "==" $2;} '
|
||||
# sed -E 's/ldm/ldm-updated/;20,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > environment-mac-updated.yml
|
||||
# CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n ldm-updated | awk ' {print " - " $1 "==" $2;} '
|
||||
# ```
|
||||
#
|
||||
# Unfortunately, as of 2022-08-31, this fails at the pip stage.
|
||||
- albumentations==1.2.1
|
||||
- coloredlogs==15.0.1
|
||||
- einops==0.4.1
|
||||
- grpcio==1.46.4
|
||||
- humanfriendly
|
||||
- imageio-ffmpeg==0.4.7
|
||||
- humanfriendly==10.0
|
||||
- imageio==2.21.2
|
||||
- imageio-ffmpeg==0.4.7
|
||||
- imgaug==0.4.0
|
||||
- kornia==0.6.7
|
||||
- mpmath==1.2.1
|
||||
@ -43,13 +39,11 @@ dependencies:
|
||||
- streamlit==1.12.2
|
||||
- sympy==1.10.1
|
||||
- tensorboard==2.9.0
|
||||
- transformers==4.21.2
|
||||
- torchmetrics==0.9.3
|
||||
- pip:
|
||||
- invisible-watermark
|
||||
- test-tube
|
||||
- tokenizers
|
||||
- torch-fidelity
|
||||
- -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers
|
||||
- test-tube==0.7.5
|
||||
- transformers==4.21.2
|
||||
- torch-fidelity==0.3.0
|
||||
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
|
||||
- -e git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
- -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
|
||||
|
@ -59,6 +59,8 @@ class PromptFormatter:
|
||||
switches.append(f'-H{opt.height or t2i.height}')
|
||||
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')
|
||||
switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
|
||||
if opt.seamless or t2i.seamless:
|
||||
switches.append(f'--seamless')
|
||||
if opt.init_img:
|
||||
switches.append(f'-I{opt.init_img}')
|
||||
if opt.fit:
|
||||
|
@ -76,7 +76,8 @@ class DreamServer(BaseHTTPRequestHandler):
|
||||
steps = int(post_data['steps'])
|
||||
width = int(post_data['width'])
|
||||
height = int(post_data['height'])
|
||||
fit = 'fit' in post_data
|
||||
fit = 'fit' in post_data
|
||||
seamless = 'seamless' in post_data
|
||||
cfgscale = float(post_data['cfgscale'])
|
||||
sampler_name = post_data['sampler']
|
||||
gfpgan_strength = float(post_data['gfpgan_strength']) if gfpgan_model_exists else 0
|
||||
@ -92,7 +93,7 @@ class DreamServer(BaseHTTPRequestHandler):
|
||||
# across images generated by each call to prompt2img(), so we define it in
|
||||
# the outer scope of image_done()
|
||||
config = post_data.copy() # Shallow copy
|
||||
config['initimg'] = ''
|
||||
config['initimg'] = config.pop('initimg_name','')
|
||||
|
||||
images_generated = 0 # helps keep track of when upscaling is started
|
||||
images_upscaled = 0 # helps keep track of when upscaling is completed
|
||||
@ -170,6 +171,7 @@ class DreamServer(BaseHTTPRequestHandler):
|
||||
gfpgan_strength = gfpgan_strength,
|
||||
upscale = upscale,
|
||||
sampler_name = sampler_name,
|
||||
seamless = seamless,
|
||||
step_callback=image_progress,
|
||||
image_callback=image_done)
|
||||
else:
|
||||
@ -191,6 +193,7 @@ class DreamServer(BaseHTTPRequestHandler):
|
||||
width = width,
|
||||
height = height,
|
||||
fit = fit,
|
||||
seamless = seamless,
|
||||
gfpgan_strength=gfpgan_strength,
|
||||
upscale = upscale,
|
||||
step_callback=image_progress,
|
||||
|
@ -14,6 +14,7 @@ from PIL import Image
|
||||
from tqdm import tqdm, trange
|
||||
from itertools import islice
|
||||
from einops import rearrange, repeat
|
||||
from torch import nn
|
||||
from torchvision.utils import make_grid
|
||||
from pytorch_lightning import seed_everything
|
||||
from torch import autocast
|
||||
@ -109,6 +110,7 @@ class T2I:
|
||||
downsampling_factor
|
||||
precision
|
||||
strength
|
||||
seamless
|
||||
embedding_path
|
||||
|
||||
The vast majority of these arguments default to reasonable values.
|
||||
@ -132,6 +134,7 @@ class T2I:
|
||||
precision='autocast',
|
||||
full_precision=False,
|
||||
strength=0.75, # default in scripts/img2img.py
|
||||
seamless=False,
|
||||
embedding_path=None,
|
||||
device_type = 'cuda',
|
||||
# just to keep track of this parameter when regenerating prompt
|
||||
@ -153,6 +156,7 @@ class T2I:
|
||||
self.precision = precision
|
||||
self.full_precision = True if choose_torch_device() == 'mps' else full_precision
|
||||
self.strength = strength
|
||||
self.seamless = seamless
|
||||
self.embedding_path = embedding_path
|
||||
self.device_type = device_type
|
||||
self.model = None # empty for now
|
||||
@ -217,6 +221,7 @@ class T2I:
|
||||
step_callback = None,
|
||||
width = None,
|
||||
height = None,
|
||||
seamless = False,
|
||||
# these are specific to img2img
|
||||
init_img = None,
|
||||
fit = False,
|
||||
@ -240,6 +245,7 @@ class T2I:
|
||||
width // width of image, in multiples of 64 (512)
|
||||
height // height of image, in multiples of 64 (512)
|
||||
cfg_scale // how strongly the prompt influences the image (7.5) (must be >1)
|
||||
seamless // whether the generated image should tile
|
||||
init_img // path to an initial image - its dimensions override width and height
|
||||
strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
|
||||
gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
|
||||
@ -268,6 +274,7 @@ class T2I:
|
||||
steps = steps or self.steps
|
||||
width = width or self.width
|
||||
height = height or self.height
|
||||
seamless = seamless or self.seamless
|
||||
cfg_scale = cfg_scale or self.cfg_scale
|
||||
ddim_eta = ddim_eta or self.ddim_eta
|
||||
iterations = iterations or self.iterations
|
||||
@ -278,6 +285,10 @@ class T2I:
|
||||
model = (
|
||||
self.load_model()
|
||||
) # will instantiate the model or return it from cache
|
||||
for m in model.modules():
|
||||
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
|
||||
m.padding_mode = 'circular' if seamless else m._orig_padding_mode
|
||||
|
||||
assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0'
|
||||
assert (
|
||||
0.0 <= strength <= 1.0
|
||||
@ -324,7 +335,6 @@ class T2I:
|
||||
self.model.encode_first_stage(init_image)
|
||||
) # move to latent space
|
||||
|
||||
print(f' DEBUG: seed at make_image time ={seed}')
|
||||
make_image = self._img2img(
|
||||
prompt,
|
||||
steps=steps,
|
||||
@ -413,10 +423,7 @@ class T2I:
|
||||
f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}'
|
||||
)
|
||||
if image_callback is not None:
|
||||
if save_original:
|
||||
image_callback(image, seed)
|
||||
else:
|
||||
image_callback(image, seed, upscaled=True)
|
||||
image_callback(image, seed, upscaled=True)
|
||||
else: # no callback passed, so we simply replace old image with rescaled one
|
||||
result[0] = image
|
||||
|
||||
@ -604,6 +611,10 @@ class T2I:
|
||||
|
||||
self._set_sampler()
|
||||
|
||||
for m in self.model.modules():
|
||||
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
|
||||
m._orig_padding_mode = m.padding_mode
|
||||
|
||||
return self.model
|
||||
|
||||
# returns a tensor filled with random numbers from a normal distribution
|
||||
|
@ -62,6 +62,7 @@ def main():
|
||||
grid = opt.grid,
|
||||
# this is solely for recreating the prompt
|
||||
latent_diffusion_weights=opt.laion400m,
|
||||
seamless=opt.seamless,
|
||||
embedding_path=opt.embedding_path,
|
||||
device_type=opt.device
|
||||
)
|
||||
@ -87,6 +88,9 @@ def main():
|
||||
print(f'{e}. Aborting.')
|
||||
sys.exit(-1)
|
||||
|
||||
if opt.seamless:
|
||||
print(">> changed to seamless tiling mode")
|
||||
|
||||
# preload the model
|
||||
tic = time.time()
|
||||
t2i.load_model()
|
||||
@ -418,6 +422,11 @@ def create_argv_parser():
|
||||
default='outputs/img-samples',
|
||||
help='Directory to save generated images and a log of prompts and seeds. Default: outputs/img-samples',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--seamless',
|
||||
action='store_true',
|
||||
help='Change the model to seamless tiling (circular) mode',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--embedding_path',
|
||||
type=str,
|
||||
@ -540,6 +549,11 @@ def create_cmd_parser():
|
||||
default=None,
|
||||
help='Directory to save generated images and a log of prompts and seeds',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--seamless',
|
||||
action='store_true',
|
||||
help='Change the model to seamless tiling (circular) mode',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
'--individual',
|
||||
|
@ -37,6 +37,8 @@
|
||||
<option value="k_euler_a">KEULER_A</option>
|
||||
<option value="k_heun">KHEUN</option>
|
||||
</select>
|
||||
<input type="checkbox" name="seamless" id="seamless">
|
||||
<label for="seamless">Seamless circular tiling</label>
|
||||
<br>
|
||||
<label title="Set to multiple of 64" for="width">Width:</label>
|
||||
<select id="width" name="width" value="512">
|
||||
@ -64,7 +66,7 @@
|
||||
<input value="-1" type="number" id="seed" name="seed">
|
||||
<button type="button" id="reset-seed">↺</button>
|
||||
<input type="checkbox" name="progress_images" id="progress_images">
|
||||
<label for="progress_images">Display in-progress images (slows down generation):</label>
|
||||
<label for="progress_images">Display in-progress images (slower)</label>
|
||||
<button type="button" id="reset-all">Reset to Defaults</button>
|
||||
</div>
|
||||
<div id="img2img">
|
||||
@ -74,7 +76,7 @@
|
||||
<label for="strength">Img2Img Strength:</label>
|
||||
<input value="0.75" type="number" id="strength" name="strength" step="0.01" min="0" max="1">
|
||||
<input type="checkbox" id="fit" name="fit" checked>
|
||||
<label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height:</label>
|
||||
<label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height</label>
|
||||
</div>
|
||||
<div id="gfpgan">
|
||||
<label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength (0 to disable):</label>
|
||||
|
@ -19,7 +19,8 @@ function appendOutput(src, seed, config) {
|
||||
outputNode.addEventListener('click', () => {
|
||||
let form = document.querySelector("#generate-form");
|
||||
for (const [k, v] of new FormData(form)) {
|
||||
form.querySelector(`*[name=${k}]`).value = config[k];
|
||||
if (k == 'initimg') { continue; }
|
||||
form.querySelector(`*[name=${k}]`).value = config[k];
|
||||
}
|
||||
document.querySelector("#seed").value = seed;
|
||||
|
||||
@ -59,6 +60,7 @@ async function generateSubmit(form) {
|
||||
|
||||
// Convert file data to base64
|
||||
let formData = Object.fromEntries(new FormData(form));
|
||||
formData.initimg_name = formData.initimg.name
|
||||
formData.initimg = formData.initimg.name !== '' ? await toBase64(formData.initimg) : null;
|
||||
|
||||
let strength = formData.strength;
|
||||
|
Loading…
Reference in New Issue
Block a user