diff --git a/README-Mac-MPS.md b/README-Mac-MPS.md index 04d513cf8c..7f84e3a01e 100644 --- a/README-Mac-MPS.md +++ b/README-Mac-MPS.md @@ -320,3 +320,20 @@ something that depends on it-- Rosetta can translate some Intel instructions but not the specialized ones here. To avoid this, make sure to use the environment variable `CONDA_SUBDIR=osx-arm64`, which restricts the Conda environment to only use ARM packages, and use `nomkl` as described above. + +### input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible + +May appear when just starting to generate, e.g.: + +``` +dream> clouds +Generating: 0%| | 0/1 [00:00' and 'tensor<*xf16>' are not broadcast compatible +LLVM ERROR: Failed to infer result type(s). +Abort trap: 6 +/Users/[...]/opt/anaconda3/envs/ldm/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown + warnings.warn('resource_tracker: There appear to be %d ' + ``` + +Macs do not support autocast/mixed-precision. Supply `--full_precision` to use float32 everywhere. \ No newline at end of file diff --git a/README.md b/README.md index a171a2bea7..435b3b038b 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,13 @@ You may also pass a -v option to generate count variants on the original passing the first generated image back into img2img the requested number of times. It generates interesting variants. +## Seamless Tiling + +The seamless tiling mode causes generated images to seamlessly tile with itself. To use it, add the --seamless option when starting the script which will result in all generated images to tile, or for each dream> prompt as shown here: +``` +dream> "pond garden with lotus by claude monet" --seamless -s100 -n4 +``` + ## GFPGAN and Real-ESRGAN Support The script also provides the ability to do face restoration and @@ -400,7 +407,11 @@ repository and associated paper for details and limitations. # Latest Changes -- v1.13 (3 September 2022) +- v1.14 (In progress) + + - Add "seamless mode" for circular tiling of image. Generates beautiful effects. ([prixt](https://github.com/prixt)) + +- v1.13 (3 September 2022 - Support image variations (see [VARIATIONS](VARIATIONS.md) ([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers) - Supports a Google Colab notebook for a standalone server running on Google hardware [Arturo Mendivil](https://github.com/artmen1516) diff --git a/VARIATIONS.md b/VARIATIONS.md index cb42ddfd0e..24cc04cc48 100644 --- a/VARIATIONS.md +++ b/VARIATIONS.md @@ -108,6 +108,6 @@ the chosen two images. Here's the one I like best: -As you can see, this is a very powerful too, which when combined with +As you can see, this is a very powerful tool, which when combined with subprompt weighting, gives you great control over the content and quality of your generated images. diff --git a/environment-mac.yaml b/environment-mac.yaml index 44cd1efcd6..07d7d6a0f0 100644 --- a/environment-mac.yaml +++ b/environment-mac.yaml @@ -1,33 +1,29 @@ name: ldm channels: - - pytorch-nightly + - pytorch - conda-forge dependencies: - - python==3.9.13 + - python==3.10.5 - pip==22.2.2 - # pytorch-nightly, left unpinned + # pytorch left unpinned - pytorch - - torchmetrics - torchvision # I suggest to keep the other deps sorted for convenience. - # If you wish to upgrade to 3.10, try to run this: + # To determine what the latest versions should be, run: # # ```shell - # CONDA_CMD=conda - # sed -E 's/python==3.9.13/python==3.10.5/;s/ldm/ldm-3.10/;21,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > /tmp/environment-mac-updated.yml - # CONDA_SUBDIR=osx-arm64 $CONDA_CMD env create -f /tmp/environment-mac-updated.yml && $CONDA_CMD list -n ldm-3.10 | awk ' {print " - " $1 "==" $2;} ' + # sed -E 's/ldm/ldm-updated/;20,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > environment-mac-updated.yml + # CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n ldm-updated | awk ' {print " - " $1 "==" $2;} ' # ``` - # - # Unfortunately, as of 2022-08-31, this fails at the pip stage. - albumentations==1.2.1 - coloredlogs==15.0.1 - einops==0.4.1 - grpcio==1.46.4 - - humanfriendly - - imageio-ffmpeg==0.4.7 + - humanfriendly==10.0 - imageio==2.21.2 + - imageio-ffmpeg==0.4.7 - imgaug==0.4.0 - kornia==0.6.7 - mpmath==1.2.1 @@ -43,13 +39,11 @@ dependencies: - streamlit==1.12.2 - sympy==1.10.1 - tensorboard==2.9.0 - - transformers==4.21.2 + - torchmetrics==0.9.3 - pip: - - invisible-watermark - - test-tube - - tokenizers - - torch-fidelity - - -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers + - test-tube==0.7.5 + - transformers==4.21.2 + - torch-fidelity==0.3.0 - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - -e git+https://github.com/openai/CLIP.git@main#egg=clip - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion diff --git a/ldm/dream/pngwriter.py b/ldm/dream/pngwriter.py index 2461486b22..b97cc1470c 100644 --- a/ldm/dream/pngwriter.py +++ b/ldm/dream/pngwriter.py @@ -59,6 +59,8 @@ class PromptFormatter: switches.append(f'-H{opt.height or t2i.height}') switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}') switches.append(f'-A{opt.sampler_name or t2i.sampler_name}') + if opt.seamless or t2i.seamless: + switches.append(f'--seamless') if opt.init_img: switches.append(f'-I{opt.init_img}') if opt.fit: diff --git a/ldm/dream/server.py b/ldm/dream/server.py index bba537d6ad..0427cfd56b 100644 --- a/ldm/dream/server.py +++ b/ldm/dream/server.py @@ -76,7 +76,8 @@ class DreamServer(BaseHTTPRequestHandler): steps = int(post_data['steps']) width = int(post_data['width']) height = int(post_data['height']) - fit = 'fit' in post_data + fit = 'fit' in post_data + seamless = 'seamless' in post_data cfgscale = float(post_data['cfgscale']) sampler_name = post_data['sampler'] gfpgan_strength = float(post_data['gfpgan_strength']) if gfpgan_model_exists else 0 @@ -92,7 +93,7 @@ class DreamServer(BaseHTTPRequestHandler): # across images generated by each call to prompt2img(), so we define it in # the outer scope of image_done() config = post_data.copy() # Shallow copy - config['initimg'] = '' + config['initimg'] = config.pop('initimg_name','') images_generated = 0 # helps keep track of when upscaling is started images_upscaled = 0 # helps keep track of when upscaling is completed @@ -170,6 +171,7 @@ class DreamServer(BaseHTTPRequestHandler): gfpgan_strength = gfpgan_strength, upscale = upscale, sampler_name = sampler_name, + seamless = seamless, step_callback=image_progress, image_callback=image_done) else: @@ -191,6 +193,7 @@ class DreamServer(BaseHTTPRequestHandler): width = width, height = height, fit = fit, + seamless = seamless, gfpgan_strength=gfpgan_strength, upscale = upscale, step_callback=image_progress, diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 3b4cf6c54d..ed5f83ef82 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -14,6 +14,7 @@ from PIL import Image from tqdm import tqdm, trange from itertools import islice from einops import rearrange, repeat +from torch import nn from torchvision.utils import make_grid from pytorch_lightning import seed_everything from torch import autocast @@ -109,6 +110,7 @@ class T2I: downsampling_factor precision strength + seamless embedding_path The vast majority of these arguments default to reasonable values. @@ -132,6 +134,7 @@ class T2I: precision='autocast', full_precision=False, strength=0.75, # default in scripts/img2img.py + seamless=False, embedding_path=None, device_type = 'cuda', # just to keep track of this parameter when regenerating prompt @@ -153,6 +156,7 @@ class T2I: self.precision = precision self.full_precision = True if choose_torch_device() == 'mps' else full_precision self.strength = strength + self.seamless = seamless self.embedding_path = embedding_path self.device_type = device_type self.model = None # empty for now @@ -217,6 +221,7 @@ class T2I: step_callback = None, width = None, height = None, + seamless = False, # these are specific to img2img init_img = None, fit = False, @@ -240,6 +245,7 @@ class T2I: width // width of image, in multiples of 64 (512) height // height of image, in multiples of 64 (512) cfg_scale // how strongly the prompt influences the image (7.5) (must be >1) + seamless // whether the generated image should tile init_img // path to an initial image - its dimensions override width and height strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely @@ -268,6 +274,7 @@ class T2I: steps = steps or self.steps width = width or self.width height = height or self.height + seamless = seamless or self.seamless cfg_scale = cfg_scale or self.cfg_scale ddim_eta = ddim_eta or self.ddim_eta iterations = iterations or self.iterations @@ -278,6 +285,10 @@ class T2I: model = ( self.load_model() ) # will instantiate the model or return it from cache + for m in model.modules(): + if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): + m.padding_mode = 'circular' if seamless else m._orig_padding_mode + assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0' assert ( 0.0 <= strength <= 1.0 @@ -324,7 +335,6 @@ class T2I: self.model.encode_first_stage(init_image) ) # move to latent space - print(f' DEBUG: seed at make_image time ={seed}') make_image = self._img2img( prompt, steps=steps, @@ -413,10 +423,7 @@ class T2I: f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}' ) if image_callback is not None: - if save_original: - image_callback(image, seed) - else: - image_callback(image, seed, upscaled=True) + image_callback(image, seed, upscaled=True) else: # no callback passed, so we simply replace old image with rescaled one result[0] = image @@ -604,6 +611,10 @@ class T2I: self._set_sampler() + for m in self.model.modules(): + if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): + m._orig_padding_mode = m.padding_mode + return self.model # returns a tensor filled with random numbers from a normal distribution diff --git a/scripts/dream.py b/scripts/dream.py index b1b9282ec0..a044962f3b 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -62,6 +62,7 @@ def main(): grid = opt.grid, # this is solely for recreating the prompt latent_diffusion_weights=opt.laion400m, + seamless=opt.seamless, embedding_path=opt.embedding_path, device_type=opt.device ) @@ -87,6 +88,9 @@ def main(): print(f'{e}. Aborting.') sys.exit(-1) + if opt.seamless: + print(">> changed to seamless tiling mode") + # preload the model tic = time.time() t2i.load_model() @@ -418,6 +422,11 @@ def create_argv_parser(): default='outputs/img-samples', help='Directory to save generated images and a log of prompts and seeds. Default: outputs/img-samples', ) + parser.add_argument( + '--seamless', + action='store_true', + help='Change the model to seamless tiling (circular) mode', + ) parser.add_argument( '--embedding_path', type=str, @@ -540,6 +549,11 @@ def create_cmd_parser(): default=None, help='Directory to save generated images and a log of prompts and seeds', ) + parser.add_argument( + '--seamless', + action='store_true', + help='Change the model to seamless tiling (circular) mode', + ) parser.add_argument( '-i', '--individual', diff --git a/static/dream_web/index.html b/static/dream_web/index.html index bf57afae3f..49a0fcccd4 100644 --- a/static/dream_web/index.html +++ b/static/dream_web/index.html @@ -37,6 +37,8 @@ + +
- +
@@ -74,7 +76,7 @@ - +
diff --git a/static/dream_web/index.js b/static/dream_web/index.js index cbd66366f4..ceaef32f09 100644 --- a/static/dream_web/index.js +++ b/static/dream_web/index.js @@ -19,7 +19,8 @@ function appendOutput(src, seed, config) { outputNode.addEventListener('click', () => { let form = document.querySelector("#generate-form"); for (const [k, v] of new FormData(form)) { - form.querySelector(`*[name=${k}]`).value = config[k]; + if (k == 'initimg') { continue; } + form.querySelector(`*[name=${k}]`).value = config[k]; } document.querySelector("#seed").value = seed; @@ -59,6 +60,7 @@ async function generateSubmit(form) { // Convert file data to base64 let formData = Object.fromEntries(new FormData(form)); + formData.initimg_name = formData.initimg.name formData.initimg = formData.initimg.name !== '' ? await toBase64(formData.initimg) : null; let strength = formData.strength;