add documentation and bug fixes

- normalized how filenames are written out when postprocessing invoked - various fixes of bugs encountered during testing - updated documentation - updated help text
2024-08-30 20:32:17 +00:00 · 2022-09-28 11:48:11 -04:00 · 2022-09-28 11:48:11 -04:00 · dff4850a82
commit dff4850a82
parent 800f9615c2
6 changed files with 166 additions and 47 deletions
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -205,6 +205,85 @@ well as the --mask (-M) argument:
 | --init_mask <path> | -M<path>   | None                |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|


+# Convenience commands
+
+In addition to the standard image generation arguments, there are a
+series of convenience commands that begin with !:
+
+## !fix
+
+This command runs a post-processor on a previously-generated image. It
+takes a PNG filename or path and applies your choice of the -U, -G, or
+--embiggen switches in order to fix faces or upscale. If you provide a
+filename, the script will look for it in the current output
+directory. Otherwise you can provide a full or partial path to the
+desired file.
+
+Some examples:
+
+Upscale to 4X its original size and fix faces using codeformer:
+~~~
+dream> !fix 0000045.4829112.png -G1 -U4 -ft codeformer
+~~~
+
+Use the GFPGAN algorithm to fix faces, then upscale to 3X using --embiggen:
+
+~~~
+dream> !fix 0000045.4829112.png -G0.8 -ft gfpgan
+>> fixing outputs/img-samples/0000045.4829112.png
+>> retrieved seed 4829112 and prompt "boy enjoying a banana split"
+>> GFPGAN - Restoring Faces for image seed:4829112
+Outputs:
+[1] outputs/img-samples/000017.4829112.gfpgan-00.png: !fix "outputs/img-samples/0000045.4829112.png" -s 50 -S  -W 512 -H 512 -C 7.5 -A k_lms -G 0.8
+
+dream> !fix 000017.4829112.gfpgan-00.png --embiggen 3
+...lots of text...
+Outputs:
+[2] outputs/img-samples/000018.2273800735.embiggen-00.png: !fix "outputs/img-samples/000017.243781548.gfpgan-00.png" -s 50 -S 2273800735 -W 512 -H 512 -C 7.5 -A k_lms --embiggen 3.0 0.75 0.25
+~~~
+
+## !fetch
+
+This command retrieves the generation parameters from a previously
+generated image and either loads them into the command line
+(Linux|Mac), or prints them out in a comment for copy-and-paste
+(Windows). You may provide either the name of a file in the current
+output directory, or a full file path.
+
+~~~
+dream> !fetch 0000015.8929913.png
+# the script returns the next line, ready for editing and running:
+dream> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
+~~~
+
+Note that this command may behave unexpectedly if given a PNG file that
+was not generated by InvokeAI.
+
+## !history
+
+The dream script keeps track of all the commands you issue during a
+session, allowing you to re-run them. On Mac and Linux systems, it
+also writes the command-line history out to disk, giving you access to
+the most recent 1000 commands issued.
+
+The `!history` command will return a numbered list of all the commands
+issued during the session (Windows), or the most recent 1000 commands
+(Mac|Linux). You can then repeat a command by using the command !NNN,
+where "NNN" is the history line number. For example:
+
+~~~
+dream> !history
+...
+[14] happy woman sitting under tree wearing broad hat and flowing garment
+[15] beautiful woman sitting under tree wearing broad hat and flowing garment
+[18] beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6
+[20] watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
+[21] surrealist painting of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
+...
+dream> !20
+dream> watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
+~~~
+
 # Command-line editing and completion

 If you are on a Macintosh or Linux machine, the command-line offers
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -446,8 +446,36 @@ class Args(object):
    def _create_dream_cmd_parser(self):
        parser = argparse.ArgumentParser(
            description="""
-            Generate example: dream> a fantastic alien landscape -W576 -H512 -s60 -n4
-            Postprocess example: dream> !pp 0000045.4829112.png -G1 -U4 -ft codeformer
+            *Image generation:*
+            To generate images, type a text prompt with optional switches. Example:
+                 a fantastic alien landscape -W576 -H512 -s60 -n4
+
+            *postprocessing*
+            To post-process a previously-generated image, use the "!fix" command, and
+            provide the image filename and postprocessing options. You may provide either the filename,
+            in which case the script will look in the current output directory, or an arbitrary absolute or
+            relative path to the desired PNG file.
+                   -G (strength)        - apply face-fixing, e.g. -G0.8
+                   -U (scaleg)          - upscale to the desired dimensions with ersgan, e.g. -U2
+                   --embiggen (scale)   - upscale using the embiggen algorithm
+                   -ft (algorithm)      - select which face-fixing algorithm to use (gfpgan|codeformer)
+
+            Example: !fix 0000045.4829112.png -G1 -U4 -ft codeformer
+
+            *History manipulation*
+            Use !fetch to retrieve the image generation parameters used to generate a previously-generated
+            image. The original command will be inserted onto the command line for editing (Linux, Mac), or
+            printed as a comment above the dream> prompt (Windows). If a bare filename is provided, the script
+            will look in the current output directory
+
+            Example: dream> !fetch 0000015.8929913.png
+                     dream> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
+
+            Use !history to get a numbered list of the past 1000 commands (Linux, Mac) or the commands issued
+            during the current session (Windows).
+
+            Use !NN to retrieve the NNth command from the history list and load it into the command line
+            for editing and re-issuing.
            """
        )
        render_group     = parser.add_argument_group('General rendering')
--- a/ldm/dream/log.py
+++ b/ldm/dream/log.py
@ -23,11 +23,14 @@ def write_log(results, log_path, file_types, output_cntr):
 def write_log_message(results, output_cntr):
    """logs to the terminal"""
    log_lines = [f"{path}: {prompt}\n" for path, prompt in results]
-    for l in log_lines:
-        output_cntr += 1
-        print(f"[{output_cntr}] {l}", end="")
-    return output_cntr
-
+    if len(log_lines)>1:
+        subcntr = 1
+        for l in log_lines:
+           print(f"[{output_cntr}.{subcntr}] {l}", end="")
+           subcntr += 1
+    else:
+           print(f"[{output_cntr}] {log_lines[0]}", end="")
+    return output_cntr+1

 def write_log_files(results, log_path, file_types):
    for file_type in file_types:
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -21,6 +21,10 @@ try:
 except:
    readline_available = False

+#to simulate what happens on windows systems, uncomment
+# this line
+#readline_available = False
+
 IMG_EXTENSIONS     = ('.png','.jpg','.jpeg')
 COMMANDS = (
    '--steps','-s',
@ -101,12 +105,14 @@ class Completer:
            response = None
        return response

-    def add_to_history(self,line):
+    def add_history(self,line):
        '''
-        This is a no-op; readline handles this automatically. But we provide it
-        for DummyReadline compatibility.
+        Pass thru to readline
        '''
-        pass
+        readline.add_history(line)
+
+    def remove_history_item(self,pos):
+        readline.remove_history_item(pos)

    def add_seed(self, seed):
        '''
@ -226,7 +232,7 @@ class DummyCompleter(Completer):
        super().__init__(options)
        self.history = list()
        
-    def add_to_history(self,line):
+    def add_history(self,line):
        self.history.append(line)

    def get_current_history_length(self):
@ -235,6 +241,9 @@ class DummyCompleter(Completer):
    def get_history_item(self,index):
        return self.history[index-1]

+    def remove_history_item(self,index):
+        return self.history.pop(index-1)
+
    def set_line(self,line):
        print(f'# {line}')

@ -244,6 +253,7 @@ if readline_available:
    readline.set_completer(
        completer.complete
    )
+    readline.set_auto_history(False)
    readline.set_pre_input_hook(completer._pre_input_hook)
    readline.set_completer_delims(' ')
    readline.parse_and_bind('tab: complete')
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -490,25 +490,26 @@ class Generate:
            opt                 = None,
            ):
        # retrieve the seed from the image;
-        # note that we will try both the new way and the old way, since not all files have the
-        # metadata (yet)
        seed   = None
        image_metadata = None
        prompt = None
-        try:
-            args = metadata_from_png(image_path)
-            seed   = args.seed
-            prompt = args.prompt
-            print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')
-        except:
-            m    = re.search('(\d+)\.png$',image_path)
-            if m:
-                seed = m.group(1)
+
+        args   = metadata_from_png(image_path)
+        seed   = args.seed
+        prompt = args.prompt
+        print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')

        if not seed:
            print('* Could not recover seed for image. Replacing with 42. This will not affect image quality')
            seed = 42
-        
+
+        # try to reuse the same filename prefix as the original file.
+        # note that this is hacky
+        prefix = None
+        m    = re.search('(\d+)\.',os.path.basename(image_path))
+        if m:
+            prefix = m.groups()[0]
+
        # face fixers and esrgan take an Image, but embiggen takes a path
        image = Image.open(image_path)

@ -530,6 +531,7 @@ class Generate:
                save_original = save_original,
                upscale = upscale,
                image_callback = callback,
+                prefix = prefix,
            )

        elif tool == 'embiggen':
@ -716,7 +718,9 @@ class Generate:
                                strength      =  0.0,
                                codeformer_fidelity = 0.75,
                                save_original = False,
-                                image_callback = None):
+                                image_callback = None,
+                                prefix = None,
+    ):
            
        for r in image_list:
            image, seed = r
@ -750,7 +754,7 @@ class Generate:
                )

            if image_callback is not None:
-                image_callback(image, seed, upscaled=True)
+                image_callback(image, seed, upscaled=True, use_prefix=prefix)
            else:
                r[0] = image

--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -17,10 +17,9 @@ from ldm.dream.image_util import make_grid
 from ldm.dream.log import write_log
 from omegaconf import OmegaConf

-# Placeholder to be replaced with proper class that tracks the
-# outputs and associates with the prompt that generated them.
-# Just want to get the formatting look right for now.
-output_cntr = 0
+# The output counter labels each output and is keyed to the
+# command-line history
+output_cntr = completer.get_current_history_length()+1

 def main():
    """Initialize command-line parsers and the diffusion model"""
@ -259,17 +258,21 @@ def main_loop(gen, opt, infile):
        last_results = []
        try:
            file_writer      = PngWriter(current_outdir)
-            prefix           = file_writer.unique_prefix()
            results          = []  # list of filename, prompt pairs
            grid_images      = dict()  # seed -> Image, only used if `opt.grid`
            prior_variations = opt.with_variations or []

-            def image_writer(image, seed, upscaled=False, first_seed=None):
+            def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None):
                # note the seed is the seed of the current image
                # the first_seed is the original seed that noise is added to
                # when the -v switch is used to generate variations
-                path = None
                nonlocal prior_variations
+                if use_prefix is not None:
+                    prefix = use_prefix
+                else:
+                    prefix           = file_writer.unique_prefix()
+
+                path = None
                if opt.grid:
                    grid_images[seed] = image
                else:
@ -348,7 +351,10 @@ def main_loop(gen, opt, infile):
        global output_cntr
        output_cntr = write_log(results, log_path ,('txt', 'md'), output_cntr)
        print()
-        completer.add_to_history(command)
+        if operation == 'postprocess':
+            completer.add_history(f'!fix {command}')
+        else:
+            completer.add_history(command)

    print('goodbye!')

@ -372,7 +378,7 @@ def do_postprocess (gen, opt, callback):
    opt.save_original = True # do not overwrite old image!
    opt.last_operation    = f'postprocess:{tool}'
    gen.apply_postprocessor(
-        image_path      = opt.prompt,
+        image_path      = file_path,
        tool            = tool,
        gfpgan_strength = opt.gfpgan_strength,
        codeformer_fidelity = opt.codeformer_fidelity,
@ -423,7 +429,7 @@ def choose_postprocess_name(opt,prefix,seed) -> str:
    filename  = None
    available = False
    while not available:
-        if counter > 0:
+        if counter == 0:
            filename = f'{prefix}.{seed}.{modifier}.png'
        else:
            filename = f'{prefix}.{seed}.{modifier}-{counter:02d}.png'
@ -514,16 +520,5 @@ def retrieve_dream_command(opt,file_path):
    cmd = dream_cmd_from_png(path)
    completer.set_line(cmd)

-def write_log_message(results, log_path):
-    """logs the name of the output image, prompt, and prompt args to the terminal and log file"""
-    global output_cntr
-    log_lines = [f'{path}: {prompt}\n' for path, prompt in results]
-    for l in log_lines:
-        output_cntr += 1
-        print(f'[{output_cntr}] {l}',end='')
-
-    with open(log_path, 'a', encoding='utf-8') as file:
-        file.writelines(log_lines)
-
 if __name__ == '__main__':
    main()