Merge branch 'development' into main

2025-07-25 21:05:37 +00:00 · 2022-09-04 08:15:51 -04:00
parent 91e826e5f4 5116c8178c
commit 3a2be621f3
10 changed files with 86 additions and 30 deletions
--- a/README-Mac-MPS.md
+++ b/README-Mac-MPS.md
@ -320,3 +320,20 @@ something that depends on it-- Rosetta can translate some Intel instructions but
 not the specialized ones here. To avoid this, make sure to use the environment
 variable `CONDA_SUBDIR=osx-arm64`, which restricts the Conda environment to only
 use ARM packages, and use `nomkl` as described above.
+
+### input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
+
+May appear when just starting to generate, e.g.:
+
+```
+dream> clouds
+Generating:   0%|                                                              | 0/1 [00:00<?, ?it/s]/Users/[...]/dev/stable-diffusion/ldm/modules/embedding_manager.py:152: UserWarning: The operator 'aten::nonzero' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1662016319283/work/aten/src/ATen/mps/MPSFallback.mm:11.)
+  placeholder_idx = torch.where(
+                                                                                                    loc("mps_add"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/20d6c351-ee94-11ec-bcaf-7247572f23b4/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":219:0)): error: input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
+LLVM ERROR: Failed to infer result type(s).
+Abort trap: 6
+/Users/[...]/opt/anaconda3/envs/ldm/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
+  warnings.warn('resource_tracker: There appear to be %d '
+  ```
+
+Macs do not support autocast/mixed-precision. Supply `--full_precision` to use float32 everywhere.
--- a/README.md
+++ b/README.md
@ -138,6 +138,13 @@ You may also pass a -v<count> option to generate count variants on the original
 passing the first generated image back into img2img the requested number of times. It generates interesting
 variants.

+## Seamless Tiling
+
+The seamless tiling mode causes generated images to seamlessly tile with itself. To use it, add the --seamless option when starting the script which will result in all generated images to tile, or for each dream> prompt as shown here:
+```
+dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
+```
+
 ## GFPGAN and Real-ESRGAN Support

 The script also provides the ability to do face restoration and
@ -400,7 +407,11 @@ repository and associated paper for details and limitations.

 # Latest Changes

- v1.13 (3 September 2022)
+- v1.14 (In progress)
+
+  - Add "seamless mode" for circular tiling of image. Generates beautiful effects. ([prixt](https://github.com/prixt))
+
+- v1.13 (3 September 2022

  - Support image variations (see [VARIATIONS](VARIATIONS.md) ([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers)
  - Supports a Google Colab notebook for a standalone server running on Google hardware [Arturo Mendivil](https://github.com/artmen1516)
--- a/VARIATIONS.md
+++ b/VARIATIONS.md
@ -108,6 +108,6 @@ the chosen two images. Here's the one I like best:

 <img src="static/variation_walkthru/000004.3747154981.png">

-As you can see, this is a very powerful too, which when combined with
+As you can see, this is a very powerful tool, which when combined with
 subprompt weighting, gives you great control over the content and
 quality of your generated images.
--- a/environment-mac.yaml
+++ b/environment-mac.yaml
@ -1,33 +1,29 @@
 name: ldm
 channels:
-  - pytorch-nightly
+  - pytorch
  - conda-forge
 dependencies:
-  - python==3.9.13
+  - python==3.10.5
  - pip==22.2.2
 
-  # pytorch-nightly, left unpinned
+  # pytorch left unpinned
  - pytorch
-  - torchmetrics
  - torchvision

  # I suggest to keep the other deps sorted for convenience.
-  # If you wish to upgrade to 3.10, try to run this:
+  # To determine what the latest versions should be, run:
  #
  # ```shell
-  # CONDA_CMD=conda
-  # sed -E 's/python==3.9.13/python==3.10.5/;s/ldm/ldm-3.10/;21,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > /tmp/environment-mac-updated.yml
-  # CONDA_SUBDIR=osx-arm64 $CONDA_CMD env create -f /tmp/environment-mac-updated.yml && $CONDA_CMD list -n ldm-3.10 | awk ' {print "  - " $1 "==" $2;} '
+  # sed -E 's/ldm/ldm-updated/;20,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > environment-mac-updated.yml
+  # CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n ldm-updated | awk ' {print "  - " $1 "==" $2;} '
  # ```
-  #
-  # Unfortunately, as of 2022-08-31, this fails at the pip stage.
  - albumentations==1.2.1
  - coloredlogs==15.0.1
  - einops==0.4.1
  - grpcio==1.46.4
-  - humanfriendly
-  - imageio-ffmpeg==0.4.7
+  - humanfriendly==10.0
  - imageio==2.21.2
+  - imageio-ffmpeg==0.4.7
  - imgaug==0.4.0
  - kornia==0.6.7
  - mpmath==1.2.1
@ -43,13 +39,11 @@ dependencies:
  - streamlit==1.12.2
  - sympy==1.10.1
  - tensorboard==2.9.0
-  - transformers==4.21.2
+  - torchmetrics==0.9.3
  - pip:
-    - invisible-watermark
-    - test-tube
-    - tokenizers
-    - torch-fidelity
-    - -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers
+    - test-tube==0.7.5
+    - transformers==4.21.2
+    - torch-fidelity==0.3.0
    - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
    - -e git+https://github.com/openai/CLIP.git@main#egg=clip
    - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
--- a/ldm/dream/pngwriter.py
+++ b/ldm/dream/pngwriter.py
@ -59,6 +59,8 @@ class PromptFormatter:
        switches.append(f'-H{opt.height       or t2i.height}')
        switches.append(f'-C{opt.cfg_scale    or t2i.cfg_scale}')
        switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
+        if opt.seamless or t2i.seamless:
+            switches.append(f'--seamless')
        if opt.init_img:
            switches.append(f'-I{opt.init_img}')
        if opt.fit:
--- a/ldm/dream/server.py
+++ b/ldm/dream/server.py
@ -76,7 +76,8 @@ class DreamServer(BaseHTTPRequestHandler):
        steps = int(post_data['steps'])
        width = int(post_data['width'])
        height = int(post_data['height'])
-        fit    = 'fit' in post_data
+        fit      = 'fit' in post_data
+        seamless = 'seamless' in post_data
        cfgscale = float(post_data['cfgscale'])
        sampler_name  = post_data['sampler']
        gfpgan_strength = float(post_data['gfpgan_strength']) if gfpgan_model_exists else 0
@ -92,7 +93,7 @@ class DreamServer(BaseHTTPRequestHandler):
        # across images generated by each call to prompt2img(), so we define it in
        # the outer scope of image_done()
        config = post_data.copy() # Shallow copy
-        config['initimg'] = ''
+        config['initimg'] = config.pop('initimg_name','')

        images_generated = 0    # helps keep track of when upscaling is started
        images_upscaled = 0     # helps keep track of when upscaling is completed
@ -170,6 +171,7 @@ class DreamServer(BaseHTTPRequestHandler):
                                        gfpgan_strength = gfpgan_strength,
                                        upscale         = upscale,
                                        sampler_name    = sampler_name,
+                                        seamless        = seamless,
                                        step_callback=image_progress,
                                        image_callback=image_done)
            else:
@ -191,6 +193,7 @@ class DreamServer(BaseHTTPRequestHandler):
                                            width      = width,
                                            height     = height,
                                            fit        = fit,
+                                            seamless   = seamless,
                                            gfpgan_strength=gfpgan_strength,
                                            upscale         = upscale,
                                            step_callback=image_progress,
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@ -14,6 +14,7 @@ from PIL import Image
 from tqdm import tqdm, trange
 from itertools import islice
 from einops import rearrange, repeat
+from torch import nn
 from torchvision.utils import make_grid
 from pytorch_lightning import seed_everything
 from torch import autocast
@ -109,6 +110,7 @@ class T2I:
        downsampling_factor
        precision
        strength
+        seamless
        embedding_path

    The vast majority of these arguments default to reasonable values.
@ -132,6 +134,7 @@ class T2I:
            precision='autocast',
            full_precision=False,
            strength=0.75,  # default in scripts/img2img.py
+            seamless=False,
            embedding_path=None,
            device_type = 'cuda',
            # just to keep track of this parameter when regenerating prompt
@ -153,6 +156,7 @@ class T2I:
        self.precision                = precision
        self.full_precision           = True if choose_torch_device() == 'mps' else full_precision
        self.strength                 = strength
+        self.seamless                 = seamless
        self.embedding_path           = embedding_path
        self.device_type              = device_type
        self.model                    = None     # empty for now
@ -217,6 +221,7 @@ class T2I:
            step_callback  =    None,
            width          =    None,
            height         =    None,
+            seamless       =    False,
            # these are specific to img2img
            init_img       =    None,
            fit            =    False,
@ -240,6 +245,7 @@ class T2I:
           width                           // width of image, in multiples of 64 (512)
           height                          // height of image, in multiples of 64 (512)
           cfg_scale                       // how strongly the prompt influences the image (7.5) (must be >1)
+           seamless                        // whether the generated image should tile
           init_img                        // path to an initial image - its dimensions override width and height
           strength                        // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
           gfpgan_strength                 // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
@ -268,6 +274,7 @@ class T2I:
        steps                 = steps      or self.steps
        width                 = width      or self.width
        height                = height     or self.height
+        seamless              = seamless   or self.seamless
        cfg_scale             = cfg_scale  or self.cfg_scale
        ddim_eta              = ddim_eta   or self.ddim_eta
        iterations            = iterations or self.iterations
@ -278,6 +285,10 @@ class T2I:
        model = (
            self.load_model()
        )  # will instantiate the model or return it from cache
+        for m in model.modules():
+            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+                m.padding_mode = 'circular' if seamless else m._orig_padding_mode
+        
        assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0'
        assert (
            0.0 <= strength <= 1.0
@ -324,7 +335,6 @@ class T2I:
                        self.model.encode_first_stage(init_image)
                    ) # move to latent space

-                print(f' DEBUG: seed at make_image time ={seed}')
                make_image = self._img2img(
                    prompt,
                    steps=steps,
@ -413,10 +423,7 @@ class T2I:
                                f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}'
                            )
                        if image_callback is not None:
-                            if save_original:
-                                image_callback(image, seed)
-                            else:
-                                image_callback(image, seed, upscaled=True)
+                            image_callback(image, seed, upscaled=True)
                        else:  # no callback passed, so we simply replace old image with rescaled one
                            result[0] = image

@ -604,6 +611,10 @@ class T2I:

            self._set_sampler()

+            for m in self.model.modules():
+                if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+                    m._orig_padding_mode = m.padding_mode
+
        return self.model

    # returns a tensor filled with random numbers from a normal distribution
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -62,6 +62,7 @@ def main():
        grid  = opt.grid,
        # this is solely for recreating the prompt
        latent_diffusion_weights=opt.laion400m,
+        seamless=opt.seamless,
        embedding_path=opt.embedding_path,
        device_type=opt.device
    )
@ -87,6 +88,9 @@ def main():
            print(f'{e}. Aborting.')
            sys.exit(-1)

+    if opt.seamless:
+        print(">> changed to seamless tiling mode")
+
    # preload the model
    tic = time.time()
    t2i.load_model()
@ -418,6 +422,11 @@ def create_argv_parser():
        default='outputs/img-samples',
        help='Directory to save generated images and a log of prompts and seeds. Default: outputs/img-samples',
    )
+    parser.add_argument(
+        '--seamless',
+        action='store_true',
+        help='Change the model to seamless tiling (circular) mode',
+    )
    parser.add_argument(
        '--embedding_path',
        type=str,
@ -540,6 +549,11 @@ def create_cmd_parser():
        default=None,
        help='Directory to save generated images and a log of prompts and seeds',
    )
+    parser.add_argument(
+        '--seamless',
+        action='store_true',
+        help='Change the model to seamless tiling (circular) mode',
+    )
    parser.add_argument(
        '-i',
        '--individual',
--- a/static/dream_web/index.html
+++ b/static/dream_web/index.html
@ -37,6 +37,8 @@
 	      <option value="k_euler_a">KEULER_A</option>
              <option value="k_heun">KHEUN</option>
            </select>
+            <input type="checkbox" name="seamless" id="seamless">
+	    <label for="seamless">Seamless circular tiling</label>
            <br>
            <label title="Set to multiple of 64" for="width">Width:</label>
            <select id="width" name="width" value="512">
@ -64,7 +66,7 @@
            <input value="-1" type="number" id="seed" name="seed">
            <button type="button" id="reset-seed">&olarr;</button>
            <input type="checkbox" name="progress_images" id="progress_images">
-	    <label for="progress_images">Display in-progress images (slows down generation):</label>
+	    <label for="progress_images">Display in-progress images (slower)</label>
 	    <button type="button" id="reset-all">Reset to Defaults</button>
 	</div>
 	<div id="img2img">
@ -74,7 +76,7 @@
          <label for="strength">Img2Img Strength:</label>
          <input value="0.75" type="number" id="strength" name="strength" step="0.01" min="0" max="1">
          <input type="checkbox" id="fit" name="fit" checked>
-          <label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height:</label>
+          <label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height</label>
 	</div>
        <div id="gfpgan">
          <label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength (0 to disable):</label>
--- a/static/dream_web/index.js
+++ b/static/dream_web/index.js
@ -19,7 +19,8 @@ function appendOutput(src, seed, config) {
    outputNode.addEventListener('click', () => {
        let form = document.querySelector("#generate-form");
        for (const [k, v] of new FormData(form)) {
-            form.querySelector(`*[name=${k}]`).value = config[k];
+	    if (k == 'initimg') { continue; }
+	    form.querySelector(`*[name=${k}]`).value = config[k];
        }
        document.querySelector("#seed").value = seed;

@ -59,6 +60,7 @@ async function generateSubmit(form) {

    // Convert file data to base64
    let formData = Object.fromEntries(new FormData(form));
+    formData.initimg_name = formData.initimg.name
    formData.initimg = formData.initimg.name !== '' ? await toBase64(formData.initimg) : null;

    let strength = formData.strength;