implementation of RFC #266 (#587)

* Feature complete for #266 with exception of several small deviations: 1. initial image and model weight hashes use full sha256 hash rather than first 8 digits 2. Initialization parameters for post-processing steps not provided 3. Uses top-level "images" tags for both a single image and a grid of images. This change was suggested in a comment. * Added scripts/sd_metadata.py to retrieve and print metadata from PNG files * New ldm.dream.args.Args class is a namespace like object which holds all defaults and can be modified during exection to hold current settings. * Modified dream.py and server.py to accommodate Args class.
2024-08-30 20:32:17 +00:00 · 2022-09-16 13:09:04 -04:00
parent 45af30f3a4
commit 403d02d94f
7 changed files with 768 additions and 433 deletions
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -1,8 +1,6 @@
 #!/usr/bin/env python3
 # Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)

-import argparse
-import shlex
 import os
 import re
 import sys
@ -10,7 +8,8 @@ import copy
 import warnings
 import time
 import ldm.dream.readline
-from ldm.dream.pngwriter import PngWriter, PromptFormatter
+from ldm.dream.args import Args, format_metadata
+from ldm.dream.pngwriter import PngWriter
 from ldm.dream.server import DreamServer, ThreadingDreamServer
 from ldm.dream.image_util import make_grid
 from omegaconf import OmegaConf
@ -22,14 +21,16 @@ output_cntr = 0

 def main():
    """Initialize command-line parsers and the diffusion model"""
-    arg_parser = create_argv_parser()
-    opt = arg_parser.parse_args()
+    opt  = Args()
+    args = opt.parse_args()
+    if not args:
+        sys.exit(-1)

-    if opt.laion400m:
+    if args.laion400m:
        print('--laion400m flag has been deprecated. Please use --model laion400m instead.')
        sys.exit(-1)
-    if opt.weights != 'model':
-        print('--weights argument has been deprecated. Please configure ./configs/models.yaml, and call it using --model instead.')
+    if args.weights:
+        print('--weights argument has been deprecated. Please edit ./configs/models.yaml, and select the weights using --model instead.')
        sys.exit(-1)

    print('* Initializing, be patient...\n')
@ -47,7 +48,7 @@ def main():
    # the user input loop
    try:
        gen = Generate(
-            conf           = opt.config,
+            conf           = opt.conf,
            model          = opt.model,
            sampler_name   = opt.sampler_name,
            embedding_path = opt.embedding_path,
@ -91,11 +92,10 @@ def main():
        dream_server_loop(gen, opt.host, opt.port, opt.outdir)
        sys.exit(0)

-    cmd_parser = create_cmd_parser()
-    main_loop(gen, opt.outdir, opt.prompt_as_dir, cmd_parser, infile)
+    main_loop(gen, opt, infile)

 # TODO: main_loop() has gotten busy. Needs to be refactored.
-def main_loop(gen, outdir, prompt_as_dir, parser, infile):
+def main_loop(gen, opt, infile):
    """prompt/read/execute loop"""
    done = False
    path_filter = re.compile(r'[<>:"/\\|?*]')
@ -103,8 +103,8 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):

    # os.pathconf is not available on Windows
    if hasattr(os, 'pathconf'):
-        path_max = os.pathconf(outdir, 'PC_PATH_MAX')
-        name_max = os.pathconf(outdir, 'PC_NAME_MAX')
+        path_max = os.pathconf(opt.outdir, 'PC_PATH_MAX')
+        name_max = os.pathconf(opt.outdir, 'PC_NAME_MAX')
    else:
        path_max = 260
        name_max = 255
@ -123,41 +123,17 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
        if command.startswith(('#', '//')):
            continue

-        # before splitting, escape single quotes so as not to mess
-        # up the parser
-        command = command.replace("'", "\\'")
-
-        try:
-            elements = shlex.split(command)
-        except ValueError as e:
-            print(str(e))
-            continue
-
-        if elements[0] == 'q':
+        if command.startswith('q '):
            done = True
            break

-        if elements[0].startswith(
+        if command.startswith(
            '!dream'
        ):   # in case a stored prompt still contains the !dream command
-            elements.pop(0)
-
-        # rearrange the arguments to mimic how it works in the Dream bot.
-        switches = ['']
-        switches_started = False
-
-        for el in elements:
-            if el[0] == '-' and not switches_started:
-                switches_started = True
-            if switches_started:
-                switches.append(el)
-            else:
-                switches[0] += el
-                switches[0] += ' '
-        switches[0] = switches[0][: len(switches[0]) - 1]
+            command.replace('!dream','',1)

        try:
-            opt = parser.parse_args(switches)
+            parser = opt.parse_cmd(command)
        except SystemExit:
            parser.print_help()
            continue
@ -185,6 +161,7 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
                opt.seed = None
                continue

+        # TODO - move this into a module
        if opt.with_variations is not None:
            # shotgun parsing, woo
            parts = []
@ -220,7 +197,7 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):

            # truncate path to maximum allowed length
            # 27 is the length of '######.##########.##.png', plus two separators and a NUL
-            subdir = subdir[:(path_max - 27 - len(os.path.abspath(outdir)))]
+            subdir = subdir[:(path_max - 27 - len(os.path.abspath(opt.outdir)))]
            current_outdir = os.path.join(outdir, subdir)

            print('Writing files to directory: "' + current_outdir + '"')
@ -248,31 +225,36 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
                        filename = f'{prefix}.{seed}.postprocessed.png'
                    else:
                        filename = f'{prefix}.{seed}.png'
+                    # the handling of variations is probably broken
+                    # Also, given the ability to add stuff to the dream_prompt_str, it isn't
+                    # necessary to make a copy of the opt option just to change its attributes
                    if opt.variation_amount > 0:
-                        iter_opt = argparse.Namespace(**vars(opt))  # copy
+                        iter_opt       = copy.copy(opt)
                        this_variation = [[seed, opt.variation_amount]]
                        if opt.with_variations is None:
                            iter_opt.with_variations = this_variation
                        else:
                            iter_opt.with_variations = opt.with_variations + this_variation
                        iter_opt.variation_amount = 0
-                        normalized_prompt = PromptFormatter(
-                            gen, iter_opt).normalize_prompt()
-                        metadata_prompt = f'{normalized_prompt} -S{iter_opt.seed}'
+                        formatted_dream_prompt = iter_opt.dream_prompt_str(seed=seed)
                    elif opt.with_variations is not None:
-                        normalized_prompt = PromptFormatter(
-                            gen, opt).normalize_prompt()
-                        # use the original seed - the per-iteration value is the last variation-seed
-                        metadata_prompt = f'{normalized_prompt} -S{opt.seed}'
+                        formatted_dream_prompt = opt.dream_prompt_str(seed=seed)
                    else:
-                        normalized_prompt = PromptFormatter(
-                            gen, opt).normalize_prompt()
-                        metadata_prompt = f'{normalized_prompt} -S{seed}'
+                        formatted_dream_prompt = opt.dream_prompt_str(seed=seed)
                    path = file_writer.save_image_and_prompt_to_png(
-                        image, metadata_prompt, filename)
+                        image           = image,
+                        dream_prompt    = formatted_dream_prompt,
+                        metadata        = format_metadata(
+                            opt,
+                            seeds      = [seed],
+                            weights    = gen.weights,
+                            model_hash = gen.model_hash,
+                        ),
+                        name      = filename,
+                    )
                    if (not upscaled) or opt.save_original:
                        # only append to results if we didn't overwrite an earlier output
-                        results.append([path, metadata_prompt])
+                        results.append([path, formatted_dream_prompt])
                last_results.append([path, seed])

            catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts
@ -286,15 +268,22 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
                grid_img   = make_grid(list(grid_images.values()))
                grid_seeds = list(grid_images.keys())
                first_seed = last_results[0][1]
-                filename = f'{prefix}.{first_seed}.png'
-                # TODO better metadata for grid images
-                normalized_prompt = PromptFormatter(
-                    gen, opt).normalize_prompt()
-                metadata_prompt = f'{normalized_prompt} -S{first_seed} --grid -n{len(grid_images)} # {grid_seeds}'
+                filename   = f'{prefix}.{first_seed}.png'
+                formatted_dream_prompt  = opt.dream_prompt_str(seed=first_seed,grid=True,iterations=len(grid_images))
+                formatted_dream_prompt += f' # {grid_seeds}'
+                metadata = format_metadata(
+                    opt,
+                    seeds      = grid_seeds,
+                    weights    = gen.weights,
+                    model_hash = gen.model_hash
+                    )
                path = file_writer.save_image_and_prompt_to_png(
-                    grid_img, metadata_prompt, filename
+                    image        = grid_img,
+                    dream_prompt = formatted_dream_prompt,
+                    metadata     = metadata,
+                    name         = filename
                )
-                results = [[path, metadata_prompt]]
+                results = [[path, formatted_dream_prompt]]

        except AssertionError as e:
            print(e)
@ -325,7 +314,6 @@ def get_next_command(infile=None) -> str:  # command string
            print(f'#{command}')
    return command

-
 def dream_server_loop(gen, host, port, outdir):
    print('\n* --web was specified, starting web server...')
    # Change working directory to the stable-diffusion directory
@ -365,315 +353,5 @@ def write_log_message(results, log_path):
    with open(log_path, 'a', encoding='utf-8') as file:
        file.writelines(log_lines)

-
-SAMPLER_CHOICES = [
-    'ddim',
-    'k_dpm_2_a',
-    'k_dpm_2',
-    'k_euler_a',
-    'k_euler',
-    'k_heun',
-    'k_lms',
-    'plms',
-]
-
-
-def create_argv_parser():
-    parser = argparse.ArgumentParser(
-        description="""Generate images using Stable Diffusion.
-        Use --web to launch the web interface. 
-        Use --from_file to load prompts from a file path or standard input ("-").
-        Otherwise you will be dropped into an interactive command prompt (type -h for help.)
-        Other command-line arguments are defaults that can usually be overridden
-        prompt the command prompt.
-"""
-    )
-    parser.add_argument(
-        '--laion400m',
-        '--latent_diffusion',
-        '-l',
-        dest='laion400m',
-        action='store_true',
-        help='Fallback to the latent diffusion (laion400m) weights and config',
-    )
-    parser.add_argument(
-        '--from_file',
-        dest='infile',
-        type=str,
-        help='If specified, load prompts from this file',
-    )
-    parser.add_argument(
-        '-n',
-        '--iterations',
-        type=int,
-        default=1,
-        help='Number of images to generate',
-    )
-    parser.add_argument(
-        '-F',
-        '--full_precision',
-        dest='full_precision',
-        action='store_true',
-        help='Use more memory-intensive full precision math for calculations',
-    )
-    parser.add_argument(
-        '-g',
-        '--grid',
-        action='store_true',
-        help='Generate a grid instead of individual images',
-    )
-    parser.add_argument(
-        '-A',
-        '-m',
-        '--sampler',
-        dest='sampler_name',
-        choices=SAMPLER_CHOICES,
-        metavar='SAMPLER_NAME',
-        default='k_lms',
-        help=f'Set the initial sampler. Default: k_lms. Supported samplers: {", ".join(SAMPLER_CHOICES)}',
-    )
-    parser.add_argument(
-        '--outdir',
-        '-o',
-        type=str,
-        default='outputs/img-samples',
-        help='Directory to save generated images and a log of prompts and seeds. Default: outputs/img-samples',
-    )
-    parser.add_argument(
-        '--seamless',
-        action='store_true',
-        help='Change the model to seamless tiling (circular) mode',
-    )
-    parser.add_argument(
-        '--embedding_path',
-        type=str,
-        help='Path to a pre-trained embedding manager checkpoint - can only be set on command line',
-    )
-    parser.add_argument(
-        '--prompt_as_dir',
-        '-p',
-        action='store_true',
-        help='Place images in subdirectories named after the prompt.',
-    )
-    # GFPGAN related args
-    parser.add_argument(
-        '--gfpgan_bg_upsampler',
-        type=str,
-        default='realesrgan',
-        help='Background upsampler. Default: realesrgan. Options: realesrgan, none.',
-
-    )
-    parser.add_argument(
-        '--gfpgan_bg_tile',
-        type=int,
-        default=400,
-        help='Tile size for background sampler, 0 for no tile during testing. Default: 400.',
-    )
-    parser.add_argument(
-        '--gfpgan_model_path',
-        type=str,
-        default='experiments/pretrained_models/GFPGANv1.3.pth',
-        help='Indicates the path to the GFPGAN model, relative to --gfpgan_dir.',
-    )
-    parser.add_argument(
-        '--gfpgan_dir',
-        type=str,
-        default='./src/gfpgan',
-        help='Indicates the directory containing the GFPGAN code.',
-    )
-    parser.add_argument(
-        '--web',
-        dest='web',
-        action='store_true',
-        help='Start in web server mode.',
-    )
-    parser.add_argument(
-        '--host',
-        type=str,
-        default='127.0.0.1',
-        help='Web server: Host or IP to listen on. Set to 0.0.0.0 to accept traffic from other devices on your network.'
-    )
-    parser.add_argument(
-        '--port',
-        type=int,
-        default='9090',
-        help='Web server: Port to listen on'
-    )
-    parser.add_argument(
-        '--weights',
-        default='model',
-        help='Indicates the Stable Diffusion model to use.',
-    )
-    parser.add_argument(
-        '--model',
-        default='stable-diffusion-1.4',
-        help='Indicates which diffusion model to load. (currently "stable-diffusion-1.4" (default) or "laion400m")',
-    )
-    parser.add_argument(
-        '--config',
-        default='configs/models.yaml',
-        help='Path to configuration file for alternate models.',
-    )
-    return parser
-
-
-def create_cmd_parser():
-    parser = argparse.ArgumentParser(
-        description='Example: dream> a fantastic alien landscape -W1024 -H960 -s100 -n12'
-    )
-    parser.add_argument('prompt')
-    parser.add_argument('-s', '--steps', type=int, help='Number of steps')
-    parser.add_argument(
-        '-S',
-        '--seed',
-        type=int,
-        help='Image seed; a +ve integer, or use -1 for the previous seed, -2 for the one before that, etc',
-    )
-    parser.add_argument(
-        '-n',
-        '--iterations',
-        type=int,
-        default=1,
-        help='Number of samplings to perform (slower, but will provide seeds for individual images)',
-    )
-    parser.add_argument(
-        '-W', '--width', type=int, help='Image width, multiple of 64'
-    )
-    parser.add_argument(
-        '-H', '--height', type=int, help='Image height, multiple of 64'
-    )
-    parser.add_argument(
-        '-C',
-        '--cfg_scale',
-        default=7.5,
-        type=float,
-        help='Classifier free guidance (CFG) scale - higher numbers cause generator to "try" harder.',
-    )
-    parser.add_argument(
-        '-g', '--grid', action='store_true', help='generate a grid'
-    )
-    parser.add_argument(
-        '--outdir',
-        '-o',
-        type=str,
-        default=None,
-        help='Directory to save generated images and a log of prompts and seeds',
-    )
-    parser.add_argument(
-        '--seamless',
-        action='store_true',
-        help='Change the model to seamless tiling (circular) mode',
-    )
-    parser.add_argument(
-        '-i',
-        '--individual',
-        action='store_true',
-        help='Generate individual files (default)',
-    )
-    parser.add_argument(
-        '-I',
-        '--init_img',
-        type=str,
-        help='Path to input image for img2img mode (supersedes width and height)',
-    )
-    parser.add_argument(
-        '-M',
-        '--init_mask',
-        type=str,
-        help='Path to input mask for inpainting mode (supersedes width and height)',
-    )
-    parser.add_argument(
-        '-T',
-        '-fit',
-        '--fit',
-        action='store_true',
-        help='If specified, will resize the input image to fit within the dimensions of width x height (512x512 default)',
-    )
-    parser.add_argument(
-        '-f',
-        '--strength',
-        default=0.75,
-        type=float,
-        help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely',
-    )
-    parser.add_argument(
-        '-G',
-        '--gfpgan_strength',
-        default=0,
-        type=float,
-        help='The strength at which to apply the GFPGAN model to the result, in order to improve faces.',
-    )
-    parser.add_argument(
-        '-U',
-        '--upscale',
-        nargs='+',
-        default=None,
-        type=float,
-        help='Scale factor (2, 4) for upscaling final output followed by upscaling strength (0-1.0). If strength not specified, defaults to 0.75'
-    )
-    parser.add_argument(
-        '-save_orig',
-        '--save_original',
-        action='store_true',
-        help='Save original. Use it when upscaling to save both versions.',
-    )
-    parser.add_argument(
-        '-embiggen',
-        '--embiggen',
-        nargs='+',
-        default=None,
-        type=float,
-        help='Embiggen tiled img2img for higher resolution and detail without extra VRAM usage. Takes scale factor relative to the size of the --init_img (-I), followed by ESRGAN upscaling strength (0-1.0), followed by minimum amount of overlap between tiles as a decimal ratio (0 - 1.0) or number of pixels. ESRGAN strength defaults to 0.75, and overlap defaults to 0.25 . ESRGAN is used to upscale the init prior to cutting it into tiles/pieces to run through img2img and then stitch back togeather.',
-    )
-    parser.add_argument(
-        '-embiggen_tiles',
-        '--embiggen_tiles',
-        nargs='+',
-        default=None,
-        type=int,
-        help='If while doing Embiggen we are altering only parts of the image, takes a list of tiles by number to process and replace onto the image e.g. `1 3 5`, useful for redoing problematic spots from a prior Embiggen run',
-    )
-    # variants is going to be superseded by a generalized "prompt-morph" function
-    #    parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
-    parser.add_argument(
-        '-x',
-        '--skip_normalize',
-        action='store_true',
-        help='Skip subprompt weight normalization',
-    )
-    parser.add_argument(
-        '-A',
-        '-m',
-        '--sampler',
-        dest='sampler_name',
-        default=None,
-        type=str,
-        choices=SAMPLER_CHOICES,
-        metavar='SAMPLER_NAME',
-        help=f'Switch to a different sampler. Supported samplers: {", ".join(SAMPLER_CHOICES)}',
-    )
-    parser.add_argument(
-        '-t',
-        '--log_tokenization',
-        action='store_true',
-        help='shows how the prompt is split into tokens'
-    )
-    parser.add_argument(
-        '-v',
-        '--variation_amount',
-        default=0.0,
-        type=float,
-        help='If > 0, generates variations on the initial seed instead of random seeds per iteration. Must be between 0 and 1. Higher values will be more different.'
-    )
-    parser.add_argument(
-        '-V',
-        '--with_variations',
-        default=None,
-        type=str,
-        help='list of variations to apply, in the format `seed:weight,seed:weight,...'
-    )
-    return parser
-
-
 if __name__ == '__main__':
    main()
--- a/scripts/sd-metadata.py
+++ b/scripts/sd-metadata.py
@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+import sys
+import json
+from ldm.dream.pngwriter import retrieve_metadata
+
+if len(sys.argv) < 2:
+    print("Usage: file2prompt.py <file1.png> <file2.png> <file3.png>...")
+    print("This script opens up the indicated dream.py-generated PNG file(s) and prints out their metadata.")
+    exit(-1)
+
+filenames = sys.argv[1:]
+for f in filenames:
+    try:
+        metadata = retrieve_metadata(f)
+        print(f'{f}:\n',json.dumps(metadata, indent=4))
+    except FileNotFoundError:
+        sys.stderr.write(f'{f} not found\n')
+        continue
+    except PermissionError:
+        sys.stderr.write(f'{f} could not be opened due to inadequate permissions\n')
+        continue