add documentation and bug fixes

- normalized how filenames are written out when postprocessing invoked - various fixes of bugs encountered during testing - updated documentation - updated help text
2024-08-30 20:32:17 +00:00 · 2022-09-28 11:48:11 -04:00
parent fe00a8c05c
commit 743342816b
6 changed files with 166 additions and 48 deletions
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -205,6 +205,85 @@ well as the --mask (-M) argument:
 | --init_mask <path> | -M<path>   | None                |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|
 # Convenience commands
 In addition to the standard image generation arguments, there are a
 series of convenience commands that begin with !:
 ## !fix
 This command runs a post-processor on a previously-generated image. It
 takes a PNG filename or path and applies your choice of the -U, -G, or
 --embiggen switches in order to fix faces or upscale. If you provide a
 filename, the script will look for it in the current output
 directory. Otherwise you can provide a full or partial path to the
 desired file.
 Some examples:
 Upscale to 4X its original size and fix faces using codeformer:
 ~~~
 dream> !fix 0000045.4829112.png -G1 -U4 -ft codeformer
 ~~~
 Use the GFPGAN algorithm to fix faces, then upscale to 3X using --embiggen:
 ~~~
 dream> !fix 0000045.4829112.png -G0.8 -ft gfpgan
 >> fixing outputs/img-samples/0000045.4829112.png
 >> retrieved seed 4829112 and prompt "boy enjoying a banana split"
 >> GFPGAN - Restoring Faces for image seed:4829112
 Outputs:
 [1] outputs/img-samples/000017.4829112.gfpgan-00.png: !fix "outputs/img-samples/0000045.4829112.png" -s 50 -S  -W 512 -H 512 -C 7.5 -A k_lms -G 0.8
 dream> !fix 000017.4829112.gfpgan-00.png --embiggen 3
 ...lots of text...
 Outputs:
 [2] outputs/img-samples/000018.2273800735.embiggen-00.png: !fix "outputs/img-samples/000017.243781548.gfpgan-00.png" -s 50 -S 2273800735 -W 512 -H 512 -C 7.5 -A k_lms --embiggen 3.0 0.75 0.25
 ~~~
 ## !fetch
 This command retrieves the generation parameters from a previously
 generated image and either loads them into the command line
 (Linux|Mac), or prints them out in a comment for copy-and-paste
 (Windows). You may provide either the name of a file in the current
 output directory, or a full file path.
 ~~~
 dream> !fetch 0000015.8929913.png
 # the script returns the next line, ready for editing and running:
 dream> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
 ~~~
 Note that this command may behave unexpectedly if given a PNG file that
 was not generated by InvokeAI.
 ## !history
 The dream script keeps track of all the commands you issue during a
 session, allowing you to re-run them. On Mac and Linux systems, it
 also writes the command-line history out to disk, giving you access to
 the most recent 1000 commands issued.
 The `!history` command will return a numbered list of all the commands
 issued during the session (Windows), or the most recent 1000 commands
 (Mac|Linux). You can then repeat a command by using the command !NNN,
 where "NNN" is the history line number. For example:
 ~~~
 dream> !history
 ...
 [14] happy woman sitting under tree wearing broad hat and flowing garment
 [15] beautiful woman sitting under tree wearing broad hat and flowing garment
 [18] beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6
 [20] watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
 [21] surrealist painting of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
 ...
 dream> !20
 dream> watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
 ~~~
 # Command-line editing and completion
 If you are on a Macintosh or Linux machine, the command-line offers
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -464,8 +464,36 @@ class Args(object):
    def _create_dream_cmd_parser(self):
        parser = argparse.ArgumentParser(
            description="""
-            Generate example: dream> a fantastic alien landscape -W576 -H512 -s60 -n4
+            *Image generation:*
-            Postprocess example: dream> !pp 0000045.4829112.png -G1 -U4 -ft codeformer
+            To generate images, type a text prompt with optional switches. Example:
                 a fantastic alien landscape -W576 -H512 -s60 -n4
            *postprocessing*
            To post-process a previously-generated image, use the "!fix" command, and
            provide the image filename and postprocessing options. You may provide either the filename,
            in which case the script will look in the current output directory, or an arbitrary absolute or
            relative path to the desired PNG file.
                   -G (strength)        - apply face-fixing, e.g. -G0.8
                   -U (scaleg)          - upscale to the desired dimensions with ersgan, e.g. -U2
                   --embiggen (scale)   - upscale using the embiggen algorithm
                   -ft (algorithm)      - select which face-fixing algorithm to use (gfpgan|codeformer)
            Example: !fix 0000045.4829112.png -G1 -U4 -ft codeformer
            *History manipulation*
            Use !fetch to retrieve the image generation parameters used to generate a previously-generated
            image. The original command will be inserted onto the command line for editing (Linux, Mac), or
            printed as a comment above the dream> prompt (Windows). If a bare filename is provided, the script
            will look in the current output directory
            Example: dream> !fetch 0000015.8929913.png
                     dream> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
            Use !history to get a numbered list of the past 1000 commands (Linux, Mac) or the commands issued
            during the current session (Windows).
            Use !NN to retrieve the NNth command from the history list and load it into the command line
            for editing and re-issuing.
            """
        )
        render_group     = parser.add_argument_group('General rendering')
--- a/ldm/dream/log.py
+++ b/ldm/dream/log.py
@ -23,11 +23,14 @@ def write_log(results, log_path, file_types, output_cntr):
 def write_log_message(results, output_cntr):
    """logs to the terminal"""
    log_lines = [f"{path}: {prompt}\n" for path, prompt in results]
    if len(log_lines)>1:
        subcntr = 1
        for l in log_lines:
-        output_cntr += 1
+           print(f"[{output_cntr}.{subcntr}] {l}", end="")
-        print(f"[{output_cntr}] {l}", end="")
+           subcntr += 1
-    return output_cntr
+    else:
-
+           print(f"[{output_cntr}] {log_lines[0]}", end="")
    return output_cntr+1
 def write_log_files(results, log_path, file_types):
    for file_type in file_types:
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -21,6 +21,10 @@ try:
 except:
    readline_available = False
 #to simulate what happens on windows systems, uncomment
 # this line
 #readline_available = False
 IMG_EXTENSIONS     = ('.png','.jpg','.jpeg')
 COMMANDS = (
    '--steps','-s',
@ -101,12 +105,14 @@ class Completer:
            response = None
        return response
-    def add_to_history(self,line):
+    def add_history(self,line):
        '''
-        This is a no-op; readline handles this automatically. But we provide it
+        Pass thru to readline
        for DummyReadline compatibility.
        '''
-        pass
+        readline.add_history(line)
    def remove_history_item(self,pos):
        readline.remove_history_item(pos)
    def add_seed(self, seed):
        '''
@ -226,7 +232,7 @@ class DummyCompleter(Completer):
        super().__init__(options)
        self.history = list()
-    def add_to_history(self,line):
+    def add_history(self,line):
        self.history.append(line)
    def get_current_history_length(self):
@ -235,6 +241,9 @@ class DummyCompleter(Completer):
    def get_history_item(self,index):
        return self.history[index-1]
    def remove_history_item(self,index):
        return self.history.pop(index-1)
    def set_line(self,line):
        print(f'# {line}')
@ -244,6 +253,7 @@ if readline_available:
    readline.set_completer(
        completer.complete
    )
    readline.set_auto_history(False)
    readline.set_pre_input_hook(completer._pre_input_hook)
    readline.set_completer_delims(' ')
    readline.parse_and_bind('tab: complete')
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -490,25 +490,26 @@ class Generate:
            opt                 = None,
            ):
        # retrieve the seed from the image;
        # note that we will try both the new way and the old way, since not all files have the
        # metadata (yet)
        seed   = None
        image_metadata = None
        prompt = None
-        try:
+
        args   = metadata_from_png(image_path)
        seed   = args.seed
        prompt = args.prompt
        print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')
        except:
            m    = re.search('(\d+)\.png$',image_path)
            if m:
                seed = m.group(1)
        if not seed:
            print('* Could not recover seed for image. Replacing with 42. This will not affect image quality')
            seed = 42
        # try to reuse the same filename prefix as the original file.
        # note that this is hacky
        prefix = None
        m    = re.search('(\d+)\.',os.path.basename(image_path))
        if m:
            prefix = m.groups()[0]
        # face fixers and esrgan take an Image, but embiggen takes a path
        image = Image.open(image_path)
@ -530,6 +531,7 @@ class Generate:
                save_original = save_original,
                upscale = upscale,
                image_callback = callback,
                prefix = prefix,
            )
        elif tool == 'embiggen':
@ -716,7 +718,9 @@ class Generate:
                                strength      =  0.0,
                                codeformer_fidelity = 0.75,
                                save_original = False,
-                                image_callback = None):
+                                image_callback = None,
                                prefix = None,
    ):
        for r in image_list:
            image, seed = r
@ -750,7 +754,7 @@ class Generate:
                )
            if image_callback is not None:
-                image_callback(image, seed, upscaled=True)
+                image_callback(image, seed, upscaled=True, use_prefix=prefix)
            else:
                r[0] = image
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -15,13 +15,11 @@ from ldm.dream.pngwriter import PngWriter
 from ldm.dream.image_util import make_grid
 from ldm.dream.log import write_log
 from omegaconf import OmegaConf
 from backend.invoke_ai_web_server import InvokeAIWebServer
-# Placeholder to be replaced with proper class that tracks the
+# The output counter labels each output and is keyed to the
-# outputs and associates with the prompt that generated them.
+# command-line history
-# Just want to get the formatting look right for now.
+output_cntr = completer.get_current_history_length()+1
 output_cntr = 0
 def main():
    """Initialize command-line parsers and the diffusion model"""
@ -260,17 +258,21 @@ def main_loop(gen, opt, infile):
        last_results = []
        try:
            file_writer      = PngWriter(current_outdir)
            prefix           = file_writer.unique_prefix()
            results          = []  # list of filename, prompt pairs
            grid_images      = dict()  # seed -> Image, only used if `opt.grid`
            prior_variations = opt.with_variations or []
-            def image_writer(image, seed, upscaled=False, first_seed=None):
+            def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None):
                # note the seed is the seed of the current image
                # the first_seed is the original seed that noise is added to
                # when the -v switch is used to generate variations
                path = None
                nonlocal prior_variations
                if use_prefix is not None:
                    prefix = use_prefix
                else:
                    prefix           = file_writer.unique_prefix()
                path = None
                if opt.grid:
                    grid_images[seed] = image
                else:
@ -349,7 +351,10 @@ def main_loop(gen, opt, infile):
        global output_cntr
        output_cntr = write_log(results, log_path ,('txt', 'md'), output_cntr)
        print()
-        completer.add_to_history(command)
+        if operation == 'postprocess':
            completer.add_history(f'!fix {command}')
        else:
            completer.add_history(command)
    print('goodbye!')
@ -373,7 +378,7 @@ def do_postprocess (gen, opt, callback):
    opt.save_original = True # do not overwrite old image!
    opt.last_operation    = f'postprocess:{tool}'
    gen.apply_postprocessor(
-        image_path      = opt.prompt,
+        image_path      = file_path,
        tool            = tool,
        gfpgan_strength = opt.gfpgan_strength,
        codeformer_fidelity = opt.codeformer_fidelity,
@ -424,7 +429,7 @@ def choose_postprocess_name(opt,prefix,seed) -> str:
    filename  = None
    available = False
    while not available:
-        if counter > 0:
+        if counter == 0:
            filename = f'{prefix}.{seed}.{modifier}.png'
        else:
            filename = f'{prefix}.{seed}.{modifier}-{counter:02d}.png'
@ -500,16 +505,5 @@ def retrieve_dream_command(opt,file_path):
    cmd = dream_cmd_from_png(path)
    completer.set_line(cmd)
 def write_log_message(results, log_path):
    """logs the name of the output image, prompt, and prompt args to the terminal and log file"""
    global output_cntr
    log_lines = [f'{path}: {prompt}\n' for path, prompt in results]
    for l in log_lines:
        output_cntr += 1
        print(f'[{output_cntr}] {l}',end='')
    with open(log_path, 'a', encoding='utf-8') as file:
        file.writelines(log_lines)
 if __name__ == '__main__':
    main()