Merge branch 'development' into main

2024-08-30 20:32:17 +00:00 · 2022-09-04 08:15:51 -04:00 · 2022-09-04 08:15:51 -04:00 · 3a2be621f3
commit 3a2be621f3
parent 91e826e5f4 5116c8178c
10 changed files with 86 additions and 30 deletions
--- a/README-Mac-MPS.md
+++ b/README-Mac-MPS.md
@ -320,3 +320,20 @@ something that depends on it-- Rosetta can translate some Intel instructions but
 not the specialized ones here. To avoid this, make sure to use the environment
 variable `CONDA_SUBDIR=osx-arm64`, which restricts the Conda environment to only
 use ARM packages, and use `nomkl` as described above.
 ### input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
 May appear when just starting to generate, e.g.:
 ```
 dream> clouds
 Generating:   0%|                                                              | 0/1 [00:00<?, ?it/s]/Users/[...]/dev/stable-diffusion/ldm/modules/embedding_manager.py:152: UserWarning: The operator 'aten::nonzero' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1662016319283/work/aten/src/ATen/mps/MPSFallback.mm:11.)
  placeholder_idx = torch.where(
                                                                                                    loc("mps_add"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/20d6c351-ee94-11ec-bcaf-7247572f23b4/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":219:0)): error: input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
 LLVM ERROR: Failed to infer result type(s).
 Abort trap: 6
 /Users/[...]/opt/anaconda3/envs/ldm/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
  warnings.warn('resource_tracker: There appear to be %d '
  ```
 Macs do not support autocast/mixed-precision. Supply `--full_precision` to use float32 everywhere.
--- a/README.md
+++ b/README.md
@ -138,6 +138,13 @@ You may also pass a -v<count> option to generate count variants on the original
 passing the first generated image back into img2img the requested number of times. It generates interesting
 variants.
 ## Seamless Tiling
 The seamless tiling mode causes generated images to seamlessly tile with itself. To use it, add the --seamless option when starting the script which will result in all generated images to tile, or for each dream> prompt as shown here:
 ```
 dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
 ```
 ## GFPGAN and Real-ESRGAN Support
 The script also provides the ability to do face restoration and
@ -400,7 +407,11 @@ repository and associated paper for details and limitations.
 # Latest Changes
- v1.13 (3 September 2022)
+- v1.14 (In progress)
  - Add "seamless mode" for circular tiling of image. Generates beautiful effects. ([prixt](https://github.com/prixt))
 - v1.13 (3 September 2022
  - Support image variations (see [VARIATIONS](VARIATIONS.md) ([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers)
  - Supports a Google Colab notebook for a standalone server running on Google hardware [Arturo Mendivil](https://github.com/artmen1516)
--- a/VARIATIONS.md
+++ b/VARIATIONS.md
@ -108,6 +108,6 @@ the chosen two images. Here's the one I like best:
 <img src="static/variation_walkthru/000004.3747154981.png">
-As you can see, this is a very powerful too, which when combined with
+As you can see, this is a very powerful tool, which when combined with
 subprompt weighting, gives you great control over the content and
 quality of your generated images.
--- a/environment-mac.yaml
+++ b/environment-mac.yaml
@ -1,33 +1,29 @@
 name: ldm
 channels:
-  - pytorch-nightly
+  - pytorch
  - conda-forge
 dependencies:
-  - python==3.9.13
+  - python==3.10.5
  - pip==22.2.2
-  # pytorch-nightly, left unpinned
+  # pytorch left unpinned
  - pytorch
  - torchmetrics
  - torchvision
  # I suggest to keep the other deps sorted for convenience.
-  # If you wish to upgrade to 3.10, try to run this:
+  # To determine what the latest versions should be, run:
  #
  # ```shell
-  # CONDA_CMD=conda
+  # sed -E 's/ldm/ldm-updated/;20,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > environment-mac-updated.yml
-  # sed -E 's/python==3.9.13/python==3.10.5/;s/ldm/ldm-3.10/;21,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > /tmp/environment-mac-updated.yml
+  # CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n ldm-updated | awk ' {print "  - " $1 "==" $2;} '
  # CONDA_SUBDIR=osx-arm64 $CONDA_CMD env create -f /tmp/environment-mac-updated.yml && $CONDA_CMD list -n ldm-3.10 | awk ' {print "  - " $1 "==" $2;} '
  # ```
  #
  # Unfortunately, as of 2022-08-31, this fails at the pip stage.
  - albumentations==1.2.1
  - coloredlogs==15.0.1
  - einops==0.4.1
  - grpcio==1.46.4
-  - humanfriendly
+  - humanfriendly==10.0
  - imageio-ffmpeg==0.4.7
  - imageio==2.21.2
  - imageio-ffmpeg==0.4.7
  - imgaug==0.4.0
  - kornia==0.6.7
  - mpmath==1.2.1
@ -43,13 +39,11 @@ dependencies:
  - streamlit==1.12.2
  - sympy==1.10.1
  - tensorboard==2.9.0
-  - transformers==4.21.2
+  - torchmetrics==0.9.3
  - pip:
-    - invisible-watermark
+    - test-tube==0.7.5
-    - test-tube
+    - transformers==4.21.2
-    - tokenizers
+    - torch-fidelity==0.3.0
    - torch-fidelity
    - -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers
    - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
    - -e git+https://github.com/openai/CLIP.git@main#egg=clip
    - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
--- a/ldm/dream/pngwriter.py
+++ b/ldm/dream/pngwriter.py
@ -59,6 +59,8 @@ class PromptFormatter:
        switches.append(f'-H{opt.height       or t2i.height}')
        switches.append(f'-C{opt.cfg_scale    or t2i.cfg_scale}')
        switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
        if opt.seamless or t2i.seamless:
            switches.append(f'--seamless')
        if opt.init_img:
            switches.append(f'-I{opt.init_img}')
        if opt.fit:
--- a/ldm/dream/server.py
+++ b/ldm/dream/server.py
@ -76,7 +76,8 @@ class DreamServer(BaseHTTPRequestHandler):
        steps = int(post_data['steps'])
        width = int(post_data['width'])
        height = int(post_data['height'])
-        fit    = 'fit' in post_data
+        fit      = 'fit' in post_data
        seamless = 'seamless' in post_data
        cfgscale = float(post_data['cfgscale'])
        sampler_name  = post_data['sampler']
        gfpgan_strength = float(post_data['gfpgan_strength']) if gfpgan_model_exists else 0
@ -92,7 +93,7 @@ class DreamServer(BaseHTTPRequestHandler):
        # across images generated by each call to prompt2img(), so we define it in
        # the outer scope of image_done()
        config = post_data.copy() # Shallow copy
-        config['initimg'] = ''
+        config['initimg'] = config.pop('initimg_name','')
        images_generated = 0    # helps keep track of when upscaling is started
        images_upscaled = 0     # helps keep track of when upscaling is completed
@ -170,6 +171,7 @@ class DreamServer(BaseHTTPRequestHandler):
                                        gfpgan_strength = gfpgan_strength,
                                        upscale         = upscale,
                                        sampler_name    = sampler_name,
                                        seamless        = seamless,
                                        step_callback=image_progress,
                                        image_callback=image_done)
            else:
@ -191,6 +193,7 @@ class DreamServer(BaseHTTPRequestHandler):
                                            width      = width,
                                            height     = height,
                                            fit        = fit,
                                            seamless   = seamless,
                                            gfpgan_strength=gfpgan_strength,
                                            upscale         = upscale,
                                            step_callback=image_progress,
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@ -14,6 +14,7 @@ from PIL import Image
 from tqdm import tqdm, trange
 from itertools import islice
 from einops import rearrange, repeat
 from torch import nn
 from torchvision.utils import make_grid
 from pytorch_lightning import seed_everything
 from torch import autocast
@ -109,6 +110,7 @@ class T2I:
        downsampling_factor
        precision
        strength
        seamless
        embedding_path
    The vast majority of these arguments default to reasonable values.
@ -132,6 +134,7 @@ class T2I:
            precision='autocast',
            full_precision=False,
            strength=0.75,  # default in scripts/img2img.py
            seamless=False,
            embedding_path=None,
            device_type = 'cuda',
            # just to keep track of this parameter when regenerating prompt
@ -153,6 +156,7 @@ class T2I:
        self.precision                = precision
        self.full_precision           = True if choose_torch_device() == 'mps' else full_precision
        self.strength                 = strength
        self.seamless                 = seamless
        self.embedding_path           = embedding_path
        self.device_type              = device_type
        self.model                    = None     # empty for now
@ -217,6 +221,7 @@ class T2I:
            step_callback  =    None,
            width          =    None,
            height         =    None,
            seamless       =    False,
            # these are specific to img2img
            init_img       =    None,
            fit            =    False,
@ -240,6 +245,7 @@ class T2I:
           width                           // width of image, in multiples of 64 (512)
           height                          // height of image, in multiples of 64 (512)
           cfg_scale                       // how strongly the prompt influences the image (7.5) (must be >1)
           seamless                        // whether the generated image should tile
           init_img                        // path to an initial image - its dimensions override width and height
           strength                        // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
           gfpgan_strength                 // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
@ -268,6 +274,7 @@ class T2I:
        steps                 = steps      or self.steps
        width                 = width      or self.width
        height                = height     or self.height
        seamless              = seamless   or self.seamless
        cfg_scale             = cfg_scale  or self.cfg_scale
        ddim_eta              = ddim_eta   or self.ddim_eta
        iterations            = iterations or self.iterations
@ -278,6 +285,10 @@ class T2I:
        model = (
            self.load_model()
        )  # will instantiate the model or return it from cache
        for m in model.modules():
            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
                m.padding_mode = 'circular' if seamless else m._orig_padding_mode
        assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0'
        assert (
            0.0 <= strength <= 1.0
@ -324,7 +335,6 @@ class T2I:
                        self.model.encode_first_stage(init_image)
                    ) # move to latent space
                print(f' DEBUG: seed at make_image time ={seed}')
                make_image = self._img2img(
                    prompt,
                    steps=steps,
@ -413,10 +423,7 @@ class T2I:
                                f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}'
                            )
                        if image_callback is not None:
-                            if save_original:
+                            image_callback(image, seed, upscaled=True)
                                image_callback(image, seed)
                            else:
                                image_callback(image, seed, upscaled=True)
                        else:  # no callback passed, so we simply replace old image with rescaled one
                            result[0] = image
@ -604,6 +611,10 @@ class T2I:
            self._set_sampler()
            for m in self.model.modules():
                if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
                    m._orig_padding_mode = m.padding_mode
        return self.model
    # returns a tensor filled with random numbers from a normal distribution
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -62,6 +62,7 @@ def main():
        grid  = opt.grid,
        # this is solely for recreating the prompt
        latent_diffusion_weights=opt.laion400m,
        seamless=opt.seamless,
        embedding_path=opt.embedding_path,
        device_type=opt.device
    )
@ -87,6 +88,9 @@ def main():
            print(f'{e}. Aborting.')
            sys.exit(-1)
    if opt.seamless:
        print(">> changed to seamless tiling mode")
    # preload the model
    tic = time.time()
    t2i.load_model()
@ -418,6 +422,11 @@ def create_argv_parser():
        default='outputs/img-samples',
        help='Directory to save generated images and a log of prompts and seeds. Default: outputs/img-samples',
    )
    parser.add_argument(
        '--seamless',
        action='store_true',
        help='Change the model to seamless tiling (circular) mode',
    )
    parser.add_argument(
        '--embedding_path',
        type=str,
@ -540,6 +549,11 @@ def create_cmd_parser():
        default=None,
        help='Directory to save generated images and a log of prompts and seeds',
    )
    parser.add_argument(
        '--seamless',
        action='store_true',
        help='Change the model to seamless tiling (circular) mode',
    )
    parser.add_argument(
        '-i',
        '--individual',
--- a/static/dream_web/index.html
+++ b/static/dream_web/index.html
@ -37,6 +37,8 @@
 	      <option value="k_euler_a">KEULER_A</option>
              <option value="k_heun">KHEUN</option>
            </select>
            <input type="checkbox" name="seamless" id="seamless">
 	    <label for="seamless">Seamless circular tiling</label>
            <br>
            <label title="Set to multiple of 64" for="width">Width:</label>
            <select id="width" name="width" value="512">
@ -64,7 +66,7 @@
            <input value="-1" type="number" id="seed" name="seed">
            <button type="button" id="reset-seed">&olarr;</button>
            <input type="checkbox" name="progress_images" id="progress_images">
-	    <label for="progress_images">Display in-progress images (slows down generation):</label>
+	    <label for="progress_images">Display in-progress images (slower)</label>
 	    <button type="button" id="reset-all">Reset to Defaults</button>
 	</div>
 	<div id="img2img">
@ -74,7 +76,7 @@
          <label for="strength">Img2Img Strength:</label>
          <input value="0.75" type="number" id="strength" name="strength" step="0.01" min="0" max="1">
          <input type="checkbox" id="fit" name="fit" checked>
-          <label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height:</label>
+          <label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height</label>
 	</div>
        <div id="gfpgan">
          <label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength (0 to disable):</label>
--- a/static/dream_web/index.js
+++ b/static/dream_web/index.js
@ -19,7 +19,8 @@ function appendOutput(src, seed, config) {
    outputNode.addEventListener('click', () => {
        let form = document.querySelector("#generate-form");
        for (const [k, v] of new FormData(form)) {
-            form.querySelector(`*[name=${k}]`).value = config[k];
+	    if (k == 'initimg') { continue; }
 	    form.querySelector(`*[name=${k}]`).value = config[k];
        }
        document.querySelector("#seed").value = seed;
@ -59,6 +60,7 @@ async function generateSubmit(form) {
    // Convert file data to base64
    let formData = Object.fromEntries(new FormData(form));
    formData.initimg_name = formData.initimg.name
    formData.initimg = formData.initimg.name !== '' ? await toBase64(formData.initimg) : null;
    let strength = formData.strength;