Merge branch 'development' into development

2024-08-30 20:32:17 +00:00 · 2022-09-18 12:35:28 -04:00
parent d478a241a8 3c4c4d71c9
commit c98ade9b25
14 changed files with 161 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -105,10 +105,11 @@ To run in full-precision mode, start `dream.py` with the `--full_precision` flag
 - [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
 - [Google Colab](docs/features/OTHER.md#google-colab)
 - [Web Server](docs/features/WEB.md)
- [Reading Prompts From File](docs/features/OTHER.md#reading-prompts-from-a-file)
+- [Reading Prompts From File](docs/features/PROMPTS.md#reading-prompts-from-a-file)
 - [Shortcut: Reusing Seeds](docs/features/OTHER.md#shortcuts-reusing-seeds)
- [Weighted Prompts](docs/features/OTHER.md#weighted-prompts)
+- [Weighted Prompts](docs/features/PROMPTS.md#weighted-prompts)
 - [Thresholding and Perlin Noise Initialization Options](/docs/features/OTHER.md#thresholding-and-perlin-noise-initialization-options)
+- [Negative/Unconditioned Prompts](docs/features/PROMPTS.md#negative-and-unconditioned-prompts)
 - [Variations](docs/features/VARIATIONS.md)
 - [Personalizing Text-to-Image Generation](docs/features/TEXTUAL_INVERSION.md)
 - [Simplified API for text to image generation](docs/features/OTHER.md#simplified-api)
--- a/backend/modules/parameters.py
+++ b/backend/modules/parameters.py
@ -40,6 +40,8 @@ def parameters_to_command(params):
        switches.append(f'-I {params["init_img"]}')
    if 'init_mask' in params and len(params['init_mask']) > 0:
        switches.append(f'-M {params["init_mask"]}')
+    if 'init_color' in params and len(params['init_color']) > 0:
+        switches.append(f'--init_color {params["init_color"]}')
    if 'strength' in params and 'init_img' in params:
        switches.append(f'-f {params["strength"]}')
        if 'fit' in params and params["fit"] == True:
@ -128,6 +130,11 @@ def create_cmd_parser():
        type=str,
        help='Path to input mask for inpainting mode (supersedes width and height)',
    )
+    parser.add_argument(
+        '--init_color',
+        type=str,
+        help='Path to reference image for color correction (used for repeated img2img and inpainting)'
+    )
    parser.add_argument(
        '-T',
        '-fit',
--- a/docs/assets/negative_prompt_walkthru/step1.png
+++ b/docs/assets/negative_prompt_walkthru/step1.png
--- a/docs/assets/negative_prompt_walkthru/step2.png
+++ b/docs/assets/negative_prompt_walkthru/step2.png
--- a/docs/assets/negative_prompt_walkthru/step3.png
+++ b/docs/assets/negative_prompt_walkthru/step3.png
--- a/docs/assets/negative_prompt_walkthru/step4.png
+++ b/docs/assets/negative_prompt_walkthru/step4.png
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -154,13 +154,19 @@ vary greatly depending on what is in the image. We also ask to --fit the image i
 than 640x480. Otherwise the image size will be identical to the provided photo and you may run out
 of memory if it is large.

+Repeated chaining of img2img on an image can result in significant color shifts
+in the output, especially if run with lower strength. Color correction can be
+run against a reference image to fix this issue. Use the original input image to the
+chain as the the reference image for each step in the chain.
+
 In addition to the command-line options recognized by txt2img, img2img accepts additional options:

 | Argument           | Shortcut  | Default | Description                                                                                                                                |
 | ------------------ | --------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
-| --init_img <path>  | -I<path>  | None    | Path to the initialization image                                                                                                           |
-| --fit              | -F        | False   | Scale the image to fit into the specified -H and -W dimensions                                                                             |
-| --strength <float> | -s<float> | 0.75    | How hard to try to match the prompt to the initial image. Ranges from 0.0-0.99, with higher values replacing the initial image completely. |
+| --init_img <path>   | -I<path>  | None    | Path to the initialization image                                                                                                           |
+| --init_color <path> |           | None    | Path to reference image for color correction               |
+| --fit               | -F        | False   | Scale the image to fit into the specified -H and -W dimensions                                                                             |
+| --strength <float>  | -s<float> | 0.75    | How hard to try to match the prompt to the initial image. Ranges from 0.0-0.99, with higher values replacing the initial image completely. |

 ### This is an example of inpainting

--- a/docs/features/OTHER.md
+++ b/docs/features/OTHER.md
@ -28,32 +28,6 @@ dream> "pond garden with lotus by claude monet" --seamless -s100 -n4

 ---

-## **Reading Prompts from a File**
-
-You can automate `dream.py` by providing a text file with the prompts you want to run, one line per
-prompt. The text file must be composed with a text editor (e.g. Notepad) and not a word processor.
-Each line should look like what you would type at the dream> prompt:
-
-```bash
-a beautiful sunny day in the park, children playing -n4 -C10
-stormy weather on a mountain top, goats grazing     -s100
-innovative packaging for a squid's dinner           -S137038382
-```
-
-Then pass this file's name to `dream.py` when you invoke it:
-
-```bash
-(ldm) ~/stable-diffusion$ python3 scripts/dream.py --from_file "path/to/prompts.txt"
-```
-
-You may read a series of prompts from standard input by providing a filename of `-`:
-
-```bash
-(ldm) ~/stable-diffusion$ echo "a beautiful day" | python3 scripts/dream.py --from_file -
-```
-
---
-
 ## **Shortcuts: Reusing Seeds**

 Since it is so common to reuse seeds while refining a prompt, there is now a shortcut as of version
--- a/docs/features/PROMPTS.md
+++ b/docs/features/PROMPTS.md
@ -0,0 +1,96 @@
+# Prompting Features
+
+## **Reading Prompts from a File**
+
+You can automate `dream.py` by providing a text file with the prompts you want to run, one line per
+prompt. The text file must be composed with a text editor (e.g. Notepad) and not a word processor.
+Each line should look like what you would type at the dream> prompt:
+
+```bash
+a beautiful sunny day in the park, children playing -n4 -C10
+stormy weather on a mountain top, goats grazing     -s100
+innovative packaging for a squid's dinner           -S137038382
+```
+
+Then pass this file's name to `dream.py` when you invoke it:
+
+```bash
+(ldm) ~/stable-diffusion$ python3 scripts/dream.py --from_file "path/to/prompts.txt"
+```
+
+You may read a series of prompts from standard input by providing a filename of `-`:
+
+```bash
+(ldm) ~/stable-diffusion$ echo "a beautiful day" | python3 scripts/dream.py --from_file -
+```
+
+---
+
+## **Weighted Prompts**
+
+You may weight different sections of the prompt to tell the sampler to attach different levels of
+priority to them, by adding `:(number)` to the end of the section you wish to up- or downweight. For
+example consider this prompt:
+
+```bash
+tabby cat:0.25 white duck:0.75 hybrid
+```
+
+This will tell the sampler to invest 25% of its effort on the tabby cat aspect of the image and 75%
+on the white duck aspect (surprisingly, this example actually works). The prompt weights can use any
+combination of integers and floating point numbers, and they do not need to add up to 1.
+
+---
+
+## **Negative and Unconditioned Prompts**
+
+Any words between a pair of square brackets will try and be ignored by Stable Diffusion's model during generation of images.
+
+```bash
+this is a test prompt [not really] to make you understand [cool] how this works.
+```
+
+In the above statement, the words 'not really cool` will be ignored by Stable Diffusion.
+
+Here's a prompt that depicts what it does.
+
+original prompt: 
+
+```bash
+"A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
+```
+
+![step1](../assets/variation_walkthru/step1.png)
+
+That image has a woman, so if we want the horse without a rider, we can influence the image not to have a woman by putting [woman] in the prompt, like this:
+
+```bash
+"A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman]" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
+```
+
+![step2](../assets/variation_walkthru/step2.png)
+
+That's nice - but say we also don't want the image to be quite so blue. We can add "blue" to the list of negative prompts, so it's now [woman blue]:
+
+```bash
+"A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman blue]" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
+```
+
+![step3](../assets/variation_walkthru/step3.png)
+
+
+Getting close - but there's no sense in having a saddle when our horse doesn't have a rider, so we'll add one more negative prompt: [woman blue saddle].
+
+```bash
+"A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman blue saddle]" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
+```
+
+![step4](../assets/variation_walkthru/step4.png)
+
+
+Notes about this feature:
+
+* The only requirement for words to be ignored is that they are in between a pair of square brackets.
+* You can provide multiple words within the same bracket.
+* You can provide multiple brackets with multiple words in different places of your prompt. That works just fine.
+* To improve typical anatomy problems, you can add negative prompts like [bad anatomy, extra legs, extra arms, extra fingers, poorly drawn hands, poorly drawn feet, disfigured, out of frame, tiling, bad art, deformed, mutated].
--- a/docs/features/VARIATIONS.md
+++ b/docs/features/VARIATIONS.md
@ -102,6 +102,7 @@ generate more variations around the almost-but-not-quite image. We do the
 latter, using both the `-V` (combining) and `-v` (variation strength) options.
 Note that we use `-n6` to generate 6 variations:

+```bash
 dream> "prompt" -S3357757885 -V3647897225,0.1,1614299449,0.1 -v0.05 -n6
 Outputs:
 ./outputs/Xena/000004.3279757577.png: "prompt" -s50 -W512 -H512 -C7.5 -Ak_lms -V 3647897225:0.1,1614299449:0.1,3279757577:0.05 -S3357757885
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -181,6 +181,10 @@ class Args(object):
            switches.append('--seamless')
        if a['init_img'] and len(a['init_img'])>0:
            switches.append(f'-I {a["init_img"]}')
+        if a['init_mask'] and len(a['init_mask'])>0:
+            switches.append(f'-M {a["init_mask"]}')
+        if a['init_color'] and len(a['init_color'])>0:
+            switches.append(f'--init_color {a["init_color"]}')
        if a['fit']:
            switches.append(f'--fit')
        if a['init_img'] and a['strength'] and a['strength']>0:
@ -493,6 +497,11 @@ class Args(object):
            type=str,
            help='Path to input mask for inpainting mode (supersedes width and height)',
        )
+        img2img_group.add_argument(
+            '--init_color',
+            type=str,
+            help='Path to reference image for color correction (used for repeated img2img and inpainting)'
+        )
        img2img_group.add_argument(
            '-T',
            '-fit',
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -22,7 +22,8 @@ class Completer:
    def complete(self, text, state):
        buffer = readline.get_line_buffer()

-        if text.startswith(('-I', '--init_img','-M','--init_mask')):
+        if text.startswith(('-I', '--init_img','-M','--init_mask',
+                            '--init_color')):
            return self._path_completions(text, state, ('.png','.jpg','.jpeg'))

        if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
@ -57,6 +58,8 @@ class Completer:
            path = text.replace('--init_mask=', '', 1).lstrip()
        elif text.startswith('-M'):
            path = text.replace('-M', '', 1).lstrip()
+        elif text.startswith('--init_color='):
+            path = text.replace('--init_color=', '', 1).lstrip()
        else:
            path = text

@ -100,6 +103,7 @@ if readline_available:
                '--individual','-i',
                '--init_img','-I',
                '--init_mask','-M',
+                '--init_color',
                '--strength','-f',
                '--variants','-v',
                '--outdir','-o',
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -15,6 +15,8 @@ import traceback
 import transformers
 import io
 import hashlib
+import cv2
+import skimage

 from omegaconf import OmegaConf
 from PIL import Image, ImageOps
@ -222,6 +224,7 @@ class Generate:
            init_mask        = None,
            fit              = False,
            strength         = None,
+            init_color       = None,
            # these are specific to embiggen (which also relies on img2img args)
            embiggen       =    None,
            embiggen_tiles =    None,
@ -372,6 +375,11 @@ class Generate:
                embiggen_tiles = embiggen_tiles,
            )

+            if init_color:
+                self.correct_colors(image_list           = results,
+                                    reference_image_path = init_color,
+                                    image_callback       = image_callback)
+
            if upscale is not None or gfpgan_strength > 0:
                self.upscale_and_reconstruct(results,
                                             upscale        = upscale,
@ -485,6 +493,28 @@ class Generate:

        return self.model

+    def correct_colors(self,
+                       image_list,
+                       reference_image_path,
+                       image_callback = None):
+        reference_image = Image.open(reference_image_path)
+        correction_target = cv2.cvtColor(np.asarray(reference_image),
+                                         cv2.COLOR_RGB2LAB)
+        for r in image_list:
+            image, seed = r
+            image = cv2.cvtColor(np.asarray(image),
+                                 cv2.COLOR_RGB2LAB)
+            image = skimage.exposure.match_histograms(image,
+                                                      correction_target,
+                                                      channel_axis=2)
+            image = Image.fromarray(
+                cv2.cvtColor(image, cv2.COLOR_LAB2RGB).astype("uint8")
+            )
+            if image_callback is not None:
+                image_callback(image, seed)
+            else:
+                r[0] = image
+
    def upscale_and_reconstruct(self,
                                image_list,
                                upscale       = None,
--- a/requirements.txt
+++ b/requirements.txt
@ -14,6 +14,7 @@ pillow
 pip>=22
 pudb
 pytorch-lightning
+scikit-image>=0.19
 streamlit
 # "CompVis/taming-transformers" IS NOT INSTALLABLE
 # This is a drop-in replacement