Merge branch 'development' into development

2024-08-30 20:32:17 +00:00 · 2022-09-18 12:35:28 -04:00
parent d478a241a8 3c4c4d71c9
commit c98ade9b25
14 changed files with 161 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -105,10 +105,11 @@ To run in full-precision mode, start `dream.py` with the `--full_precision` flag
 - [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
 - [Google Colab](docs/features/OTHER.md#google-colab)
 - [Web Server](docs/features/WEB.md)
- [Reading Prompts From File](docs/features/OTHER.md#reading-prompts-from-a-file)
+- [Reading Prompts From File](docs/features/PROMPTS.md#reading-prompts-from-a-file)
 - [Shortcut: Reusing Seeds](docs/features/OTHER.md#shortcuts-reusing-seeds)
- [Weighted Prompts](docs/features/OTHER.md#weighted-prompts)
+- [Weighted Prompts](docs/features/PROMPTS.md#weighted-prompts)
 - [Thresholding and Perlin Noise Initialization Options](/docs/features/OTHER.md#thresholding-and-perlin-noise-initialization-options)
 - [Negative/Unconditioned Prompts](docs/features/PROMPTS.md#negative-and-unconditioned-prompts)
 - [Variations](docs/features/VARIATIONS.md)
 - [Personalizing Text-to-Image Generation](docs/features/TEXTUAL_INVERSION.md)
 - [Simplified API for text to image generation](docs/features/OTHER.md#simplified-api)
--- a/backend/modules/parameters.py
+++ b/backend/modules/parameters.py
@ -40,6 +40,8 @@ def parameters_to_command(params):
        switches.append(f'-I {params["init_img"]}')
    if 'init_mask' in params and len(params['init_mask']) > 0:
        switches.append(f'-M {params["init_mask"]}')
    if 'init_color' in params and len(params['init_color']) > 0:
        switches.append(f'--init_color {params["init_color"]}')
    if 'strength' in params and 'init_img' in params:
        switches.append(f'-f {params["strength"]}')
        if 'fit' in params and params["fit"] == True:
@ -128,6 +130,11 @@ def create_cmd_parser():
        type=str,
        help='Path to input mask for inpainting mode (supersedes width and height)',
    )
    parser.add_argument(
        '--init_color',
        type=str,
        help='Path to reference image for color correction (used for repeated img2img and inpainting)'
    )
    parser.add_argument(
        '-T',
        '-fit',
--- a/docs/assets/negative_prompt_walkthru/step1.png
+++ b/docs/assets/negative_prompt_walkthru/step1.png
--- a/docs/assets/negative_prompt_walkthru/step2.png
+++ b/docs/assets/negative_prompt_walkthru/step2.png
--- a/docs/assets/negative_prompt_walkthru/step3.png
+++ b/docs/assets/negative_prompt_walkthru/step3.png
--- a/docs/assets/negative_prompt_walkthru/step4.png
+++ b/docs/assets/negative_prompt_walkthru/step4.png
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -154,11 +154,17 @@ vary greatly depending on what is in the image. We also ask to --fit the image i
 than 640x480. Otherwise the image size will be identical to the provided photo and you may run out
 of memory if it is large.
 Repeated chaining of img2img on an image can result in significant color shifts
 in the output, especially if run with lower strength. Color correction can be
 run against a reference image to fix this issue. Use the original input image to the
 chain as the the reference image for each step in the chain.
 In addition to the command-line options recognized by txt2img, img2img accepts additional options:
 | Argument           | Shortcut  | Default | Description                                                                                                                                |
 | ------------------ | --------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
 | --init_img <path>   | -I<path>  | None    | Path to the initialization image                                                                                                           |
 | --init_color <path> |           | None    | Path to reference image for color correction               |
 | --fit               | -F        | False   | Scale the image to fit into the specified -H and -W dimensions                                                                             |
 | --strength <float>  | -s<float> | 0.75    | How hard to try to match the prompt to the initial image. Ranges from 0.0-0.99, with higher values replacing the initial image completely. |
--- a/docs/features/OTHER.md
+++ b/docs/features/OTHER.md
@ -28,32 +28,6 @@ dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
 ---
 ## **Reading Prompts from a File**
 You can automate `dream.py` by providing a text file with the prompts you want to run, one line per
 prompt. The text file must be composed with a text editor (e.g. Notepad) and not a word processor.
 Each line should look like what you would type at the dream> prompt:
 ```bash
 a beautiful sunny day in the park, children playing -n4 -C10
 stormy weather on a mountain top, goats grazing     -s100
 innovative packaging for a squid's dinner           -S137038382
 ```
 Then pass this file's name to `dream.py` when you invoke it:
 ```bash
 (ldm) ~/stable-diffusion$ python3 scripts/dream.py --from_file "path/to/prompts.txt"
 ```
 You may read a series of prompts from standard input by providing a filename of `-`:
 ```bash
 (ldm) ~/stable-diffusion$ echo "a beautiful day" | python3 scripts/dream.py --from_file -
 ```
 ---
 ## **Shortcuts: Reusing Seeds**
 Since it is so common to reuse seeds while refining a prompt, there is now a shortcut as of version
--- a/docs/features/PROMPTS.md
+++ b/docs/features/PROMPTS.md
@ -0,0 +1,96 @@
 # Prompting Features
 ## **Reading Prompts from a File**
 You can automate `dream.py` by providing a text file with the prompts you want to run, one line per
 prompt. The text file must be composed with a text editor (e.g. Notepad) and not a word processor.
 Each line should look like what you would type at the dream> prompt:
 ```bash
 a beautiful sunny day in the park, children playing -n4 -C10
 stormy weather on a mountain top, goats grazing     -s100
 innovative packaging for a squid's dinner           -S137038382
 ```
 Then pass this file's name to `dream.py` when you invoke it:
 ```bash
 (ldm) ~/stable-diffusion$ python3 scripts/dream.py --from_file "path/to/prompts.txt"
 ```
 You may read a series of prompts from standard input by providing a filename of `-`:
 ```bash
 (ldm) ~/stable-diffusion$ echo "a beautiful day" | python3 scripts/dream.py --from_file -
 ```
 ---
 ## **Weighted Prompts**
 You may weight different sections of the prompt to tell the sampler to attach different levels of
 priority to them, by adding `:(number)` to the end of the section you wish to up- or downweight. For
 example consider this prompt:
 ```bash
 tabby cat:0.25 white duck:0.75 hybrid
 ```
 This will tell the sampler to invest 25% of its effort on the tabby cat aspect of the image and 75%
 on the white duck aspect (surprisingly, this example actually works). The prompt weights can use any
 combination of integers and floating point numbers, and they do not need to add up to 1.
 ---
 ## **Negative and Unconditioned Prompts**
 Any words between a pair of square brackets will try and be ignored by Stable Diffusion's model during generation of images.
 ```bash
 this is a test prompt [not really] to make you understand [cool] how this works.
 ```
 In the above statement, the words 'not really cool` will be ignored by Stable Diffusion.
 Here's a prompt that depicts what it does.
 original prompt: 
 ```bash
 "A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
 ```
 ![step1](../assets/variation_walkthru/step1.png)
 That image has a woman, so if we want the horse without a rider, we can influence the image not to have a woman by putting [woman] in the prompt, like this:
 ```bash
 "A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman]" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
 ```
 ![step2](../assets/variation_walkthru/step2.png)
 That's nice - but say we also don't want the image to be quite so blue. We can add "blue" to the list of negative prompts, so it's now [woman blue]:
 ```bash
 "A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman blue]" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
 ```
 ![step3](../assets/variation_walkthru/step3.png)
 Getting close - but there's no sense in having a saddle when our horse doesn't have a rider, so we'll add one more negative prompt: [woman blue saddle].
 ```bash
 "A fantastical translucent poney made of water and foam, ethereal, radiant, hyperalism, scottish folklore, digital painting, artstation, concept art, smooth, 8 k frostbite 3 engine, ultra detailed, art by artgerm and greg rutkowski and magali villeneuve [woman blue saddle]" -s 20 -W 512 -H 768 -C 7.5 -A k_euler_a -S 1654590180
 ```
 ![step4](../assets/variation_walkthru/step4.png)
 Notes about this feature:
 * The only requirement for words to be ignored is that they are in between a pair of square brackets.
 * You can provide multiple words within the same bracket.
 * You can provide multiple brackets with multiple words in different places of your prompt. That works just fine.
 * To improve typical anatomy problems, you can add negative prompts like [bad anatomy, extra legs, extra arms, extra fingers, poorly drawn hands, poorly drawn feet, disfigured, out of frame, tiling, bad art, deformed, mutated].
--- a/docs/features/VARIATIONS.md
+++ b/docs/features/VARIATIONS.md
@ -102,6 +102,7 @@ generate more variations around the almost-but-not-quite image. We do the
 latter, using both the `-V` (combining) and `-v` (variation strength) options.
 Note that we use `-n6` to generate 6 variations:
 ```bash
 dream> "prompt" -S3357757885 -V3647897225,0.1,1614299449,0.1 -v0.05 -n6
 Outputs:
 ./outputs/Xena/000004.3279757577.png: "prompt" -s50 -W512 -H512 -C7.5 -Ak_lms -V 3647897225:0.1,1614299449:0.1,3279757577:0.05 -S3357757885
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -181,6 +181,10 @@ class Args(object):
            switches.append('--seamless')
        if a['init_img'] and len(a['init_img'])>0:
            switches.append(f'-I {a["init_img"]}')
        if a['init_mask'] and len(a['init_mask'])>0:
            switches.append(f'-M {a["init_mask"]}')
        if a['init_color'] and len(a['init_color'])>0:
            switches.append(f'--init_color {a["init_color"]}')
        if a['fit']:
            switches.append(f'--fit')
        if a['init_img'] and a['strength'] and a['strength']>0:
@ -493,6 +497,11 @@ class Args(object):
            type=str,
            help='Path to input mask for inpainting mode (supersedes width and height)',
        )
        img2img_group.add_argument(
            '--init_color',
            type=str,
            help='Path to reference image for color correction (used for repeated img2img and inpainting)'
        )
        img2img_group.add_argument(
            '-T',
            '-fit',
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -22,7 +22,8 @@ class Completer:
    def complete(self, text, state):
        buffer = readline.get_line_buffer()
-        if text.startswith(('-I', '--init_img','-M','--init_mask')):
+        if text.startswith(('-I', '--init_img','-M','--init_mask',
                            '--init_color')):
            return self._path_completions(text, state, ('.png','.jpg','.jpeg'))
        if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
@ -57,6 +58,8 @@ class Completer:
            path = text.replace('--init_mask=', '', 1).lstrip()
        elif text.startswith('-M'):
            path = text.replace('-M', '', 1).lstrip()
        elif text.startswith('--init_color='):
            path = text.replace('--init_color=', '', 1).lstrip()
        else:
            path = text
@ -100,6 +103,7 @@ if readline_available:
                '--individual','-i',
                '--init_img','-I',
                '--init_mask','-M',
                '--init_color',
                '--strength','-f',
                '--variants','-v',
                '--outdir','-o',
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -15,6 +15,8 @@ import traceback
 import transformers
 import io
 import hashlib
 import cv2
 import skimage
 from omegaconf import OmegaConf
 from PIL import Image, ImageOps
@ -222,6 +224,7 @@ class Generate:
            init_mask        = None,
            fit              = False,
            strength         = None,
            init_color       = None,
            # these are specific to embiggen (which also relies on img2img args)
            embiggen       =    None,
            embiggen_tiles =    None,
@ -372,6 +375,11 @@ class Generate:
                embiggen_tiles = embiggen_tiles,
            )
            if init_color:
                self.correct_colors(image_list           = results,
                                    reference_image_path = init_color,
                                    image_callback       = image_callback)
            if upscale is not None or gfpgan_strength > 0:
                self.upscale_and_reconstruct(results,
                                             upscale        = upscale,
@ -485,6 +493,28 @@ class Generate:
        return self.model
    def correct_colors(self,
                       image_list,
                       reference_image_path,
                       image_callback = None):
        reference_image = Image.open(reference_image_path)
        correction_target = cv2.cvtColor(np.asarray(reference_image),
                                         cv2.COLOR_RGB2LAB)
        for r in image_list:
            image, seed = r
            image = cv2.cvtColor(np.asarray(image),
                                 cv2.COLOR_RGB2LAB)
            image = skimage.exposure.match_histograms(image,
                                                      correction_target,
                                                      channel_axis=2)
            image = Image.fromarray(
                cv2.cvtColor(image, cv2.COLOR_LAB2RGB).astype("uint8")
            )
            if image_callback is not None:
                image_callback(image, seed)
            else:
                r[0] = image
    def upscale_and_reconstruct(self,
                                image_list,
                                upscale       = None,
--- a/requirements.txt
+++ b/requirements.txt
@ -14,6 +14,7 @@ pillow
 pip>=22
 pudb
 pytorch-lightning
 scikit-image>=0.19
 streamlit
 # "CompVis/taming-transformers" IS NOT INSTALLABLE
 # This is a drop-in replacement