add documentation and bug fixes

- normalized how filenames are written out when postprocessing invoked
- various fixes of bugs encountered during testing
- updated documentation
- updated help text
This commit is contained in:
Lincoln Stein 2022-09-28 11:48:11 -04:00
parent 800f9615c2
commit dff4850a82
6 changed files with 166 additions and 47 deletions

View File

@ -205,6 +205,85 @@ well as the --mask (-M) argument:
| --init_mask <path> | -M<path> | None |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|
# Convenience commands
In addition to the standard image generation arguments, there are a
series of convenience commands that begin with !:
## !fix
This command runs a post-processor on a previously-generated image. It
takes a PNG filename or path and applies your choice of the -U, -G, or
--embiggen switches in order to fix faces or upscale. If you provide a
filename, the script will look for it in the current output
directory. Otherwise you can provide a full or partial path to the
desired file.
Some examples:
Upscale to 4X its original size and fix faces using codeformer:
~~~
dream> !fix 0000045.4829112.png -G1 -U4 -ft codeformer
~~~
Use the GFPGAN algorithm to fix faces, then upscale to 3X using --embiggen:
~~~
dream> !fix 0000045.4829112.png -G0.8 -ft gfpgan
>> fixing outputs/img-samples/0000045.4829112.png
>> retrieved seed 4829112 and prompt "boy enjoying a banana split"
>> GFPGAN - Restoring Faces for image seed:4829112
Outputs:
[1] outputs/img-samples/000017.4829112.gfpgan-00.png: !fix "outputs/img-samples/0000045.4829112.png" -s 50 -S -W 512 -H 512 -C 7.5 -A k_lms -G 0.8
dream> !fix 000017.4829112.gfpgan-00.png --embiggen 3
...lots of text...
Outputs:
[2] outputs/img-samples/000018.2273800735.embiggen-00.png: !fix "outputs/img-samples/000017.243781548.gfpgan-00.png" -s 50 -S 2273800735 -W 512 -H 512 -C 7.5 -A k_lms --embiggen 3.0 0.75 0.25
~~~
## !fetch
This command retrieves the generation parameters from a previously
generated image and either loads them into the command line
(Linux|Mac), or prints them out in a comment for copy-and-paste
(Windows). You may provide either the name of a file in the current
output directory, or a full file path.
~~~
dream> !fetch 0000015.8929913.png
# the script returns the next line, ready for editing and running:
dream> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
~~~
Note that this command may behave unexpectedly if given a PNG file that
was not generated by InvokeAI.
## !history
The dream script keeps track of all the commands you issue during a
session, allowing you to re-run them. On Mac and Linux systems, it
also writes the command-line history out to disk, giving you access to
the most recent 1000 commands issued.
The `!history` command will return a numbered list of all the commands
issued during the session (Windows), or the most recent 1000 commands
(Mac|Linux). You can then repeat a command by using the command !NNN,
where "NNN" is the history line number. For example:
~~~
dream> !history
...
[14] happy woman sitting under tree wearing broad hat and flowing garment
[15] beautiful woman sitting under tree wearing broad hat and flowing garment
[18] beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6
[20] watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
[21] surrealist painting of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
...
dream> !20
dream> watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
~~~
# Command-line editing and completion
If you are on a Macintosh or Linux machine, the command-line offers

View File

@ -446,8 +446,36 @@ class Args(object):
def _create_dream_cmd_parser(self):
parser = argparse.ArgumentParser(
description="""
Generate example: dream> a fantastic alien landscape -W576 -H512 -s60 -n4
Postprocess example: dream> !pp 0000045.4829112.png -G1 -U4 -ft codeformer
*Image generation:*
To generate images, type a text prompt with optional switches. Example:
a fantastic alien landscape -W576 -H512 -s60 -n4
*postprocessing*
To post-process a previously-generated image, use the "!fix" command, and
provide the image filename and postprocessing options. You may provide either the filename,
in which case the script will look in the current output directory, or an arbitrary absolute or
relative path to the desired PNG file.
-G (strength) - apply face-fixing, e.g. -G0.8
-U (scaleg) - upscale to the desired dimensions with ersgan, e.g. -U2
--embiggen (scale) - upscale using the embiggen algorithm
-ft (algorithm) - select which face-fixing algorithm to use (gfpgan|codeformer)
Example: !fix 0000045.4829112.png -G1 -U4 -ft codeformer
*History manipulation*
Use !fetch to retrieve the image generation parameters used to generate a previously-generated
image. The original command will be inserted onto the command line for editing (Linux, Mac), or
printed as a comment above the dream> prompt (Windows). If a bare filename is provided, the script
will look in the current output directory
Example: dream> !fetch 0000015.8929913.png
dream> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
Use !history to get a numbered list of the past 1000 commands (Linux, Mac) or the commands issued
during the current session (Windows).
Use !NN to retrieve the NNth command from the history list and load it into the command line
for editing and re-issuing.
"""
)
render_group = parser.add_argument_group('General rendering')

View File

@ -23,11 +23,14 @@ def write_log(results, log_path, file_types, output_cntr):
def write_log_message(results, output_cntr):
"""logs to the terminal"""
log_lines = [f"{path}: {prompt}\n" for path, prompt in results]
for l in log_lines:
output_cntr += 1
print(f"[{output_cntr}] {l}", end="")
return output_cntr
if len(log_lines)>1:
subcntr = 1
for l in log_lines:
print(f"[{output_cntr}.{subcntr}] {l}", end="")
subcntr += 1
else:
print(f"[{output_cntr}] {log_lines[0]}", end="")
return output_cntr+1
def write_log_files(results, log_path, file_types):
for file_type in file_types:

View File

@ -21,6 +21,10 @@ try:
except:
readline_available = False
#to simulate what happens on windows systems, uncomment
# this line
#readline_available = False
IMG_EXTENSIONS = ('.png','.jpg','.jpeg')
COMMANDS = (
'--steps','-s',
@ -101,12 +105,14 @@ class Completer:
response = None
return response
def add_to_history(self,line):
def add_history(self,line):
'''
This is a no-op; readline handles this automatically. But we provide it
for DummyReadline compatibility.
Pass thru to readline
'''
pass
readline.add_history(line)
def remove_history_item(self,pos):
readline.remove_history_item(pos)
def add_seed(self, seed):
'''
@ -226,7 +232,7 @@ class DummyCompleter(Completer):
super().__init__(options)
self.history = list()
def add_to_history(self,line):
def add_history(self,line):
self.history.append(line)
def get_current_history_length(self):
@ -235,6 +241,9 @@ class DummyCompleter(Completer):
def get_history_item(self,index):
return self.history[index-1]
def remove_history_item(self,index):
return self.history.pop(index-1)
def set_line(self,line):
print(f'# {line}')
@ -244,6 +253,7 @@ if readline_available:
readline.set_completer(
completer.complete
)
readline.set_auto_history(False)
readline.set_pre_input_hook(completer._pre_input_hook)
readline.set_completer_delims(' ')
readline.parse_and_bind('tab: complete')

View File

@ -490,25 +490,26 @@ class Generate:
opt = None,
):
# retrieve the seed from the image;
# note that we will try both the new way and the old way, since not all files have the
# metadata (yet)
seed = None
image_metadata = None
prompt = None
try:
args = metadata_from_png(image_path)
seed = args.seed
prompt = args.prompt
print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')
except:
m = re.search('(\d+)\.png$',image_path)
if m:
seed = m.group(1)
args = metadata_from_png(image_path)
seed = args.seed
prompt = args.prompt
print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')
if not seed:
print('* Could not recover seed for image. Replacing with 42. This will not affect image quality')
seed = 42
# try to reuse the same filename prefix as the original file.
# note that this is hacky
prefix = None
m = re.search('(\d+)\.',os.path.basename(image_path))
if m:
prefix = m.groups()[0]
# face fixers and esrgan take an Image, but embiggen takes a path
image = Image.open(image_path)
@ -530,6 +531,7 @@ class Generate:
save_original = save_original,
upscale = upscale,
image_callback = callback,
prefix = prefix,
)
elif tool == 'embiggen':
@ -716,7 +718,9 @@ class Generate:
strength = 0.0,
codeformer_fidelity = 0.75,
save_original = False,
image_callback = None):
image_callback = None,
prefix = None,
):
for r in image_list:
image, seed = r
@ -750,7 +754,7 @@ class Generate:
)
if image_callback is not None:
image_callback(image, seed, upscaled=True)
image_callback(image, seed, upscaled=True, use_prefix=prefix)
else:
r[0] = image

View File

@ -17,10 +17,9 @@ from ldm.dream.image_util import make_grid
from ldm.dream.log import write_log
from omegaconf import OmegaConf
# Placeholder to be replaced with proper class that tracks the
# outputs and associates with the prompt that generated them.
# Just want to get the formatting look right for now.
output_cntr = 0
# The output counter labels each output and is keyed to the
# command-line history
output_cntr = completer.get_current_history_length()+1
def main():
"""Initialize command-line parsers and the diffusion model"""
@ -259,17 +258,21 @@ def main_loop(gen, opt, infile):
last_results = []
try:
file_writer = PngWriter(current_outdir)
prefix = file_writer.unique_prefix()
results = [] # list of filename, prompt pairs
grid_images = dict() # seed -> Image, only used if `opt.grid`
prior_variations = opt.with_variations or []
def image_writer(image, seed, upscaled=False, first_seed=None):
def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None):
# note the seed is the seed of the current image
# the first_seed is the original seed that noise is added to
# when the -v switch is used to generate variations
path = None
nonlocal prior_variations
if use_prefix is not None:
prefix = use_prefix
else:
prefix = file_writer.unique_prefix()
path = None
if opt.grid:
grid_images[seed] = image
else:
@ -348,7 +351,10 @@ def main_loop(gen, opt, infile):
global output_cntr
output_cntr = write_log(results, log_path ,('txt', 'md'), output_cntr)
print()
completer.add_to_history(command)
if operation == 'postprocess':
completer.add_history(f'!fix {command}')
else:
completer.add_history(command)
print('goodbye!')
@ -372,7 +378,7 @@ def do_postprocess (gen, opt, callback):
opt.save_original = True # do not overwrite old image!
opt.last_operation = f'postprocess:{tool}'
gen.apply_postprocessor(
image_path = opt.prompt,
image_path = file_path,
tool = tool,
gfpgan_strength = opt.gfpgan_strength,
codeformer_fidelity = opt.codeformer_fidelity,
@ -423,7 +429,7 @@ def choose_postprocess_name(opt,prefix,seed) -> str:
filename = None
available = False
while not available:
if counter > 0:
if counter == 0:
filename = f'{prefix}.{seed}.{modifier}.png'
else:
filename = f'{prefix}.{seed}.{modifier}-{counter:02d}.png'
@ -514,16 +520,5 @@ def retrieve_dream_command(opt,file_path):
cmd = dream_cmd_from_png(path)
completer.set_line(cmd)
def write_log_message(results, log_path):
"""logs the name of the output image, prompt, and prompt args to the terminal and log file"""
global output_cntr
log_lines = [f'{path}: {prompt}\n' for path, prompt in results]
for l in log_lines:
output_cntr += 1
print(f'[{output_cntr}] {l}',end='')
with open(log_path, 'a', encoding='utf-8') as file:
file.writelines(log_lines)
if __name__ == '__main__':
main()