fix location of textual_inversion script

This commit is contained in:
Lincoln Stein 2023-01-26 11:56:23 -05:00
parent 61403fe306
commit d3a469d136
6 changed files with 609 additions and 515 deletions

View File

@ -55,7 +55,7 @@ Please enter 1, 2, 3, or 4: [1] 3
From the command line, with the InvokeAI virtual environment active, From the command line, with the InvokeAI virtual environment active,
you can launch the front end with the command you can launch the front end with the command
`textual_inversion_fe`. `textual_inversion --gui`.
This will launch a text-based front end that will look like this: This will launch a text-based front end that will look like this:
@ -219,11 +219,9 @@ term. For example `a plate of banana sushi in <psychedelic> style`.
## **Training with the Command-Line Script** ## **Training with the Command-Line Script**
InvokeAI also comes with a traditional command-line script for Training can also be done using a traditional command-line script. It
launching textual inversion training. It is named can be launched from within the "developer's console", or from the
`textual_inversion`, and can be launched from within the command line after activating InvokeAI's virtual environment.
"developer's console", or from the command line after activating
InvokeAI's virtual environment.
It accepts a large number of arguments, which can be summarized by It accepts a large number of arguments, which can be summarized by
passing the `--help` argument: passing the `--help` argument:

414
ldm/invoke/textual_inversion.py Executable file
View File

@ -0,0 +1,414 @@
#!/usr/bin/env python
import argparse
import curses
import os
import re
import shutil
import sys
import traceback
from argparse import Namespace
from pathlib import Path
from typing import List
import npyscreen
from omegaconf import OmegaConf
from ldm.invoke.globals import Globals, global_set_root
from ldm.invoke.textual_inversion_training import (
do_textual_inversion_training,
parse_args,
)
TRAINING_DATA = "text-inversion-training-data"
TRAINING_DIR = "text-inversion-output"
CONF_FILE = "preferences.conf"
class textualInversionForm(npyscreen.FormMultiPageAction):
resolutions = [512, 768, 1024]
lr_schedulers = [
"linear",
"cosine",
"cosine_with_restarts",
"polynomial",
"constant",
"constant_with_warmup",
]
precisions = ["no", "fp16", "bf16"]
learnable_properties = ["object", "style"]
def __init__(self, parentApp, name, saved_args=None):
self.saved_args = saved_args or {}
super().__init__(parentApp, name)
def afterEditing(self):
self.parentApp.setNextForm(None)
def create(self):
self.model_names, default = self.get_model_names()
default_initializer_token = ""
default_placeholder_token = ""
saved_args = self.saved_args
try:
default = self.model_names.index(saved_args["model"])
except:
pass
self.add_widget_intelligent(
npyscreen.FixedText,
value="Use ctrl-N and ctrl-P to move to the <N>ext and <P>revious fields, cursor arrows to make a selection, and space to toggle checkboxes.",
)
self.model = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name="Model Name:",
values=self.model_names,
value=default,
max_height=len(self.model_names) + 1,
)
self.placeholder_token = self.add_widget_intelligent(
npyscreen.TitleText,
name="Trigger Term:",
value="", # saved_args.get('placeholder_token',''), # to restore previous term
)
self.placeholder_token.when_value_edited = self.initializer_changed
self.nextrely -= 1
self.nextrelx += 30
self.prompt_token = self.add_widget_intelligent(
npyscreen.FixedText,
name="Trigger term for use in prompt",
value="",
)
self.nextrelx -= 30
self.initializer_token = self.add_widget_intelligent(
npyscreen.TitleText,
name="Initializer:",
value=saved_args.get("initializer_token", default_initializer_token),
)
self.resume_from_checkpoint = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Resume from last saved checkpoint",
value=False,
)
self.learnable_property = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name="Learnable property:",
values=self.learnable_properties,
value=self.learnable_properties.index(
saved_args.get("learnable_property", "object")
),
max_height=4,
)
self.train_data_dir = self.add_widget_intelligent(
npyscreen.TitleFilename,
name="Data Training Directory:",
select_dir=True,
must_exist=False,
value=str(
saved_args.get(
"train_data_dir",
Path(Globals.root) / TRAINING_DATA / default_placeholder_token,
)
),
)
self.output_dir = self.add_widget_intelligent(
npyscreen.TitleFilename,
name="Output Destination Directory:",
select_dir=True,
must_exist=False,
value=str(
saved_args.get(
"output_dir",
Path(Globals.root) / TRAINING_DIR / default_placeholder_token,
)
),
)
self.resolution = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name="Image resolution (pixels):",
values=self.resolutions,
value=self.resolutions.index(saved_args.get("resolution", 512)),
scroll_exit=True,
max_height=4,
)
self.center_crop = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Center crop images before resizing to resolution",
value=saved_args.get("center_crop", False),
)
self.mixed_precision = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name="Mixed Precision:",
values=self.precisions,
value=self.precisions.index(saved_args.get("mixed_precision", "fp16")),
max_height=4,
)
self.num_train_epochs = self.add_widget_intelligent(
npyscreen.TitleSlider,
name="Number of training epochs:",
out_of=1000,
step=50,
lowest=1,
value=saved_args.get("num_train_epochs", 100),
)
self.max_train_steps = self.add_widget_intelligent(
npyscreen.TitleSlider,
name="Max Training Steps:",
out_of=10000,
step=500,
lowest=1,
value=saved_args.get("max_train_steps", 3000),
)
self.train_batch_size = self.add_widget_intelligent(
npyscreen.TitleSlider,
name="Batch Size (reduce if you run out of memory):",
out_of=50,
step=1,
lowest=1,
value=saved_args.get("train_batch_size", 8),
)
self.gradient_accumulation_steps = self.add_widget_intelligent(
npyscreen.TitleSlider,
name="Gradient Accumulation Steps (may need to decrease this to resume from a checkpoint):",
out_of=10,
step=1,
lowest=1,
value=saved_args.get("gradient_accumulation_steps", 4),
)
self.lr_warmup_steps = self.add_widget_intelligent(
npyscreen.TitleSlider,
name="Warmup Steps:",
out_of=100,
step=1,
lowest=0,
value=saved_args.get("lr_warmup_steps", 0),
)
self.learning_rate = self.add_widget_intelligent(
npyscreen.TitleText,
name="Learning Rate:",
value=str(
saved_args.get("learning_rate", "5.0e-04"),
),
)
self.scale_lr = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Scale learning rate by number GPUs, steps and batch size",
value=saved_args.get("scale_lr", True),
)
self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Use xformers acceleration",
value=saved_args.get("enable_xformers_memory_efficient_attention", False),
)
self.lr_scheduler = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name="Learning rate scheduler:",
values=self.lr_schedulers,
max_height=7,
scroll_exit=True,
value=self.lr_schedulers.index(saved_args.get("lr_scheduler", "constant")),
)
def initializer_changed(self):
placeholder = self.placeholder_token.value
self.prompt_token.value = f"(Trigger by using <{placeholder}> in your prompts)"
self.train_data_dir.value = str(
Path(Globals.root) / TRAINING_DATA / placeholder
)
self.output_dir.value = str(Path(Globals.root) / TRAINING_DIR / placeholder)
self.resume_from_checkpoint.value = Path(self.output_dir.value).exists()
def on_ok(self):
if self.validate_field_values():
self.parentApp.setNextForm(None)
self.editing = False
self.parentApp.ti_arguments = self.marshall_arguments()
npyscreen.notify(
"Launching textual inversion training. This will take a while..."
)
else:
self.editing = True
def ok_cancel(self):
sys.exit(0)
def validate_field_values(self) -> bool:
bad_fields = []
if self.model.value is None:
bad_fields.append(
"Model Name must correspond to a known model in models.yaml"
)
if not re.match("^[a-zA-Z0-9.-]+$", self.placeholder_token.value):
bad_fields.append(
"Trigger term must only contain alphanumeric characters, the dot and hyphen"
)
if self.train_data_dir.value is None:
bad_fields.append("Data Training Directory cannot be empty")
if self.output_dir.value is None:
bad_fields.append("The Output Destination Directory cannot be empty")
if len(bad_fields) > 0:
message = "The following problems were detected and must be corrected:"
for problem in bad_fields:
message += f"\n* {problem}"
npyscreen.notify_confirm(message)
return False
else:
return True
def get_model_names(self) -> (List[str], int):
conf = OmegaConf.load(os.path.join(Globals.root, "configs/models.yaml"))
model_names = [
idx
for idx in sorted(list(conf.keys()))
if conf[idx].get("format", None) == "diffusers"
]
defaults = [
idx
for idx in range(len(model_names))
if "default" in conf[model_names[idx]]
]
return (model_names, defaults[0])
def marshall_arguments(self) -> dict:
args = dict()
# the choices
args.update(
model=self.model_names[self.model.value[0]],
resolution=self.resolutions[self.resolution.value[0]],
lr_scheduler=self.lr_schedulers[self.lr_scheduler.value[0]],
mixed_precision=self.precisions[self.mixed_precision.value[0]],
learnable_property=self.learnable_properties[
self.learnable_property.value[0]
],
)
# all the strings and booleans
for attr in (
"initializer_token",
"placeholder_token",
"train_data_dir",
"output_dir",
"scale_lr",
"center_crop",
"enable_xformers_memory_efficient_attention",
):
args[attr] = getattr(self, attr).value
# all the integers
for attr in (
"train_batch_size",
"gradient_accumulation_steps",
"num_train_epochs",
"max_train_steps",
"lr_warmup_steps",
):
args[attr] = int(getattr(self, attr).value)
# the floats (just one)
args.update(learning_rate=float(self.learning_rate.value))
# a special case
if self.resume_from_checkpoint.value and Path(self.output_dir.value).exists():
args["resume_from_checkpoint"] = "latest"
return args
class MyApplication(npyscreen.NPSAppManaged):
def __init__(self, saved_args=None):
super().__init__()
self.ti_arguments = None
self.saved_args = saved_args
def onStart(self):
npyscreen.setTheme(npyscreen.Themes.DefaultTheme)
self.main = self.addForm(
"MAIN",
textualInversionForm,
name="Textual Inversion Settings",
saved_args=self.saved_args,
)
def copy_to_embeddings_folder(args: dict):
"""
Copy learned_embeds.bin into the embeddings folder, and offer to
delete the full model and checkpoints.
"""
source = Path(args["output_dir"], "learned_embeds.bin")
dest_dir_name = args["placeholder_token"].strip("<>")
destination = Path(Globals.root, "embeddings", dest_dir_name)
os.makedirs(destination, exist_ok=True)
print(f">> Training completed. Copying learned_embeds.bin into {str(destination)}")
shutil.copy(source, destination)
if (
input("Delete training logs and intermediate checkpoints? [y] ") or "y"
).startswith(("y", "Y")):
shutil.rmtree(Path(args["output_dir"]))
else:
print(f'>> Keeping {args["output_dir"]}')
def save_args(args: dict):
"""
Save the current argument values to an omegaconf file
"""
dest_dir = Path(Globals.root) / TRAINING_DIR
os.makedirs(dest_dir, exist_ok=True)
conf_file = dest_dir / CONF_FILE
conf = OmegaConf.create(args)
OmegaConf.save(config=conf, f=conf_file)
def previous_args() -> dict:
"""
Get the previous arguments used.
"""
conf_file = Path(Globals.root) / TRAINING_DIR / CONF_FILE
try:
conf = OmegaConf.load(conf_file)
conf["placeholder_token"] = conf["placeholder_token"].strip("<>")
except:
conf = None
return conf
def do_front_end(args: Namespace):
saved_args = previous_args()
myapplication = MyApplication(saved_args=saved_args)
myapplication.run()
if args := myapplication.ti_arguments:
os.makedirs(args["output_dir"], exist_ok=True)
# Automatically add angle brackets around the trigger
if not re.match("^<.+>$", args["placeholder_token"]):
args["placeholder_token"] = f"<{args['placeholder_token']}>"
args["only_save_embeds"] = True
save_args(args)
try:
print(f"DEBUG: args = {args}")
do_textual_inversion_training(**args)
copy_to_embeddings_folder(args)
except Exception as e:
print("** An exception occurred during training. The exception was:")
print(str(e))
print("** DETAILS:")
print(traceback.format_exc())
def main():
args = parse_args()
global_set_root(args.root_dir or Globals.root)
try:
if args.front_end:
do_front_end(args)
else:
do_textual_inversion_training(**vars(args))
except AssertionError as e:
print(str(e))

View File

@ -31,6 +31,10 @@ from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami from huggingface_hub import HfFolder, Repository, whoami
# invokeai stuff # invokeai stuff
from ldm.invoke.args import (
PagingArgumentParser,
ArgFormatter
)
from ldm.invoke.globals import Globals, global_cache_dir from ldm.invoke.globals import Globals, global_cache_dir
from omegaconf import OmegaConf from omegaconf import OmegaConf
@ -74,145 +78,32 @@ def save_progress(text_encoder, placeholder_token_id, accelerator, placeholder_t
torch.save(learned_embeds_dict, save_path) torch.save(learned_embeds_dict, save_path)
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description="Simple example of a training script.") parser = PagingArgumentParser(
parser.add_argument( description="Textual inversion training",
"--save_steps", formatter_class=ArgFormatter
type=int,
default=500,
help="Save learned_embeds.bin every X updates steps.",
) )
parser.add_argument( general_group = parser.add_argument_group('General')
model_group = parser.add_argument_group('Models and Paths')
image_group = parser.add_argument_group('Training Image Location and Options')
trigger_group = parser.add_argument_group('Trigger Token')
training_group = parser.add_argument_group('Training Parameters')
checkpointing_group = parser.add_argument_group('Checkpointing and Resume')
integration_group = parser.add_argument_group('Integration')
general_group.add_argument(
'--front_end',
'--gui',
dest='front_end',
action="store_true",
default=False,
help="Activate the text-based graphical front end for collecting parameters. Other parameters will be ignored."
)
general_group.add_argument(
'--root_dir','--root', '--root_dir','--root',
type=Path, type=Path,
default=Globals.root, default=Globals.root,
help="Path to the invokeai runtime directory", help="Path to the invokeai runtime directory",
) )
parser.add_argument( general_group.add_argument(
"--only_save_embeds",
action="store_true",
default=False,
help="Save only the embeddings for the new concept.",
)
parser.add_argument(
"--model",
type=str,
default=None,
required=True,
help="Name of the diffusers model to train against, as defined in configs/models.yaml.",
)
parser.add_argument(
"--revision",
type=str,
default=None,
required=False,
help="Revision of pretrained model identifier from huggingface.co/models.",
)
parser.add_argument(
"--tokenizer_name",
type=str,
default=None,
help="Pretrained tokenizer name or path if not the same as model_name",
)
parser.add_argument(
"--train_data_dir",
type=Path,
default=None,
required=True,
help="A folder containing the training data."
)
parser.add_argument(
"--placeholder_token",
type=str,
default=None,
required=True,
help="A token to use as a placeholder for the concept.",
)
parser.add_argument(
"--initializer_token",
type=str,
default=None,
required=False,
help="A token to use as initializer word."
)
parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'")
parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.")
parser.add_argument(
"--output_dir",
type=Path,
default=f'{Globals.root}/text-inversion-model',
help="The output directory where the model predictions and checkpoints will be written.",
)
parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
parser.add_argument(
"--resolution",
type=int,
default=512,
help=(
"The resolution for input images, all the images in the train/validation dataset will be resized to this"
" resolution"
),
)
parser.add_argument(
"--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution"
)
parser.add_argument(
"--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader."
)
parser.add_argument("--num_train_epochs", type=int, default=100)
parser.add_argument(
"--max_train_steps",
type=int,
default=5000,
help="Total number of training steps to perform. If provided, overrides num_train_epochs.",
)
parser.add_argument(
"--gradient_accumulation_steps",
type=int,
default=1,
help="Number of updates steps to accumulate before performing a backward/update pass.",
)
parser.add_argument(
"--gradient_checkpointing",
action="store_true",
help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
)
parser.add_argument(
"--learning_rate",
type=float,
default=1e-4,
help="Initial learning rate (after the potential warmup period) to use.",
)
parser.add_argument(
"--scale_lr",
action="store_true",
default=True,
help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
)
parser.add_argument(
"--lr_scheduler",
type=str,
default="constant",
help=(
'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
' "constant", "constant_with_warmup"]'
),
)
parser.add_argument(
"--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
)
parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
parser.add_argument(
"--hub_model_id",
type=str,
default=None,
help="The name of the repository to keep in sync with the local `output_dir`.",
)
parser.add_argument(
"--logging_dir", "--logging_dir",
type=Path, type=Path,
default="logs", default="logs",
@ -221,7 +112,147 @@ def parse_args():
" *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***." " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***."
), ),
) )
parser.add_argument( general_group.add_argument(
"--output_dir",
type=Path,
default=f'{Globals.root}/text-inversion-model',
help="The output directory where the model predictions and checkpoints will be written.",
)
model_group.add_argument(
"--model",
type=str,
default='stable-diffusion-1.5',
help="Name of the diffusers model to train against, as defined in configs/models.yaml.",
)
model_group.add_argument(
"--revision",
type=str,
default=None,
required=False,
help="Revision of pretrained model identifier from huggingface.co/models.",
)
model_group.add_argument(
"--tokenizer_name",
type=str,
default=None,
help="Pretrained tokenizer name or path if not the same as model_name",
)
image_group.add_argument(
"--train_data_dir",
type=Path,
default=None,
help="A folder containing the training data."
)
image_group.add_argument(
"--resolution",
type=int,
default=512,
help=(
"The resolution for input images, all the images in the train/validation dataset will be resized to this"
" resolution"
),
)
image_group.add_argument(
"--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution"
)
trigger_group.add_argument(
"--placeholder_token",
"--trigger_term",
dest='placeholder_token',
type=str,
default=None,
help="A token to use as a placeholder for the concept. This token will trigger the concept when included in the prompt as \"<trigger>\".",
)
trigger_group.add_argument(
"--learnable_property",
type=str,
choices=['object','style'],
default="object",
help="Choose between 'object' and 'style'"
)
trigger_group.add_argument(
"--initializer_token",
type=str,
default='*',
help="A symbol to use as the initializer word."
)
checkpointing_group.add_argument(
"--checkpointing_steps",
type=int,
default=500,
help=(
"Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming"
" training using `--resume_from_checkpoint`."
),
)
checkpointing_group.add_argument(
"--resume_from_checkpoint",
type=Path,
default=None,
help=(
"Whether training should be resumed from a previous checkpoint. Use a path saved by"
' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
),
)
checkpointing_group.add_argument(
"--save_steps",
type=int,
default=500,
help="Save learned_embeds.bin every X updates steps.",
)
training_group.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.")
training_group.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
training_group.add_argument(
"--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader."
)
training_group.add_argument("--num_train_epochs", type=int, default=100)
training_group.add_argument(
"--max_train_steps",
type=int,
default=5000,
help="Total number of training steps to perform. If provided, overrides num_train_epochs.",
)
training_group.add_argument(
"--gradient_accumulation_steps",
type=int,
default=1,
help="Number of updates steps to accumulate before performing a backward/update pass.",
)
training_group.add_argument(
"--gradient_checkpointing",
action="store_true",
help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
)
training_group.add_argument(
"--learning_rate",
type=float,
default=1e-4,
help="Initial learning rate (after the potential warmup period) to use.",
)
training_group.add_argument(
"--scale_lr",
action="store_true",
default=True,
help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
)
training_group.add_argument(
"--lr_scheduler",
type=str,
default="constant",
help=(
'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
' "constant", "constant_with_warmup"]'
),
)
training_group.add_argument(
"--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
)
training_group.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
training_group.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
training_group.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
training_group.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
training_group.add_argument(
"--mixed_precision", "--mixed_precision",
type=str, type=str,
default="no", default="no",
@ -232,7 +263,7 @@ def parse_args():
"and an Nvidia Ampere GPU." "and an Nvidia Ampere GPU."
), ),
) )
parser.add_argument( training_group.add_argument(
"--allow_tf32", "--allow_tf32",
action="store_true", action="store_true",
help=( help=(
@ -240,7 +271,24 @@ def parse_args():
" https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices"
), ),
) )
training_group.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
parser.add_argument( parser.add_argument(
"--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
)
integration_group.add_argument(
"--only_save_embeds",
action="store_true",
default=False,
help="Save only the embeddings for the new concept.",
)
integration_group.add_argument(
"--hub_model_id",
type=str,
default=None,
help="The name of the repository to keep in sync with the local `output_dir`.",
)
integration_group.add_argument(
"--report_to", "--report_to",
type=str, type=str,
default="tensorboard", default="tensorboard",
@ -249,29 +297,8 @@ def parse_args():
' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.' ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.'
), ),
) )
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") integration_group.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
parser.add_argument( integration_group.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
"--checkpointing_steps",
type=int,
default=500,
help=(
"Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming"
" training using `--resume_from_checkpoint`."
),
)
parser.add_argument(
"--resume_from_checkpoint",
type=Path,
default=None,
help=(
"Whether training should be resumed from a previous checkpoint. Use a path saved by"
' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
),
)
parser.add_argument(
"--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
)
args = parser.parse_args() args = parser.parse_args()
return args return args
@ -462,7 +489,11 @@ def do_textual_inversion_training(
enable_xformers_memory_efficient_attention:bool=False, enable_xformers_memory_efficient_attention:bool=False,
root_dir:Path=None, root_dir:Path=None,
hub_model_id:str=None, hub_model_id:str=None,
**kwargs,
): ):
assert model, 'Please specify a base model with --model'
assert train_data_dir, 'Please specify a directory containing the training images using --train_data_dir'
assert placeholder_token, 'Please specify a trigger term using --placeholder_token'
env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
if env_local_rank != -1 and env_local_rank != local_rank: if env_local_rank != -1 and env_local_rank != local_rank:
local_rank = env_local_rank local_rank = env_local_rank

View File

@ -98,6 +98,8 @@ test = ["pytest>6.0.0", "pytest-cov"]
"load_models" = "scripts:configure_invokeai.main" "load_models" = "scripts:configure_invokeai.main"
"merge_embeddings" = "scripts:merge_embeddings.main" "merge_embeddings" = "scripts:merge_embeddings.main"
"preload_models" = "ldm.invoke.configure_invokeai:main" "preload_models" = "ldm.invoke.configure_invokeai:main"
"textual_inversion" = "ldm.invoke.textual_inversion:main"
"merge_models" = "ldm.invoke.merge_models:main"
[project.urls] [project.urls]
"Homepage" = "https://invoke-ai.github.io/InvokeAI/" "Homepage" = "https://invoke-ai.github.io/InvokeAI/"

View File

@ -15,7 +15,6 @@ from ldm.invoke.model_manager import ModelManager
parser = argparse.ArgumentParser(description="InvokeAI textual inversion training") parser = argparse.ArgumentParser(description="InvokeAI textual inversion training")
parser.add_argument( parser.add_argument(
"--root_dir", "--root_dir",
"--root-dir",
type=Path, type=Path,
default=Globals.root, default=Globals.root,
help="Path to the invokeai runtime directory", help="Path to the invokeai runtime directory",

View File

@ -1,350 +0,0 @@
#!/usr/bin/env python
import npyscreen
import os
import sys
import re
import shutil
import traceback
import curses
from ldm.invoke.globals import Globals, global_set_root
from omegaconf import OmegaConf
from pathlib import Path
from typing import List
import argparse
TRAINING_DATA = 'text-inversion-training-data'
TRAINING_DIR = 'text-inversion-output'
CONF_FILE = 'preferences.conf'
class textualInversionForm(npyscreen.FormMultiPageAction):
resolutions = [512, 768, 1024]
lr_schedulers = [
"linear", "cosine", "cosine_with_restarts",
"polynomial","constant", "constant_with_warmup"
]
precisions = ['no','fp16','bf16']
learnable_properties = ['object','style']
def __init__(self, parentApp, name, saved_args=None):
self.saved_args = saved_args or {}
super().__init__(parentApp, name)
def afterEditing(self):
self.parentApp.setNextForm(None)
def create(self):
self.model_names, default = self.get_model_names()
default_initializer_token = ''
default_placeholder_token = ''
saved_args = self.saved_args
try:
default = self.model_names.index(saved_args['model'])
except:
pass
self.add_widget_intelligent(
npyscreen.FixedText,
value='Use ctrl-N and ctrl-P to move to the <N>ext and <P>revious fields, cursor arrows to make a selection, and space to toggle checkboxes.'
)
self.model = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name='Model Name:',
values=self.model_names,
value=default,
max_height=len(self.model_names)+1
)
self.placeholder_token = self.add_widget_intelligent(
npyscreen.TitleText,
name='Trigger Term:',
value='', # saved_args.get('placeholder_token',''), # to restore previous term
)
self.placeholder_token.when_value_edited = self.initializer_changed
self.nextrely -= 1
self.nextrelx += 30
self.prompt_token = self.add_widget_intelligent(
npyscreen.FixedText,
name="Trigger term for use in prompt",
value='',
)
self.nextrelx -= 30
self.initializer_token = self.add_widget_intelligent(
npyscreen.TitleText,
name='Initializer:',
value=saved_args.get('initializer_token',default_initializer_token),
)
self.resume_from_checkpoint = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Resume from last saved checkpoint",
value=False,
)
self.learnable_property = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name="Learnable property:",
values=self.learnable_properties,
value=self.learnable_properties.index(saved_args.get('learnable_property','object')),
max_height=4,
)
self.train_data_dir = self.add_widget_intelligent(
npyscreen.TitleFilename,
name='Data Training Directory:',
select_dir=True,
must_exist=False,
value=str(saved_args.get('train_data_dir',Path(Globals.root) / TRAINING_DATA / default_placeholder_token))
)
self.output_dir = self.add_widget_intelligent(
npyscreen.TitleFilename,
name='Output Destination Directory:',
select_dir=True,
must_exist=False,
value=str(saved_args.get('output_dir',Path(Globals.root) / TRAINING_DIR / default_placeholder_token))
)
self.resolution = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name='Image resolution (pixels):',
values = self.resolutions,
value=self.resolutions.index(saved_args.get('resolution',512)),
scroll_exit = True,
max_height=4,
)
self.center_crop = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Center crop images before resizing to resolution",
value=saved_args.get('center_crop',False)
)
self.mixed_precision = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name='Mixed Precision:',
values=self.precisions,
value=self.precisions.index(saved_args.get('mixed_precision','fp16')),
max_height=4,
)
self.num_train_epochs = self.add_widget_intelligent(
npyscreen.TitleSlider,
name='Number of training epochs:',
out_of=1000,
step=50,
lowest=1,
value=saved_args.get('num_train_epochs',100)
)
self.max_train_steps = self.add_widget_intelligent(
npyscreen.TitleSlider,
name='Max Training Steps:',
out_of=10000,
step=500,
lowest=1,
value=saved_args.get('max_train_steps',3000)
)
self.train_batch_size = self.add_widget_intelligent(
npyscreen.TitleSlider,
name='Batch Size (reduce if you run out of memory):',
out_of=50,
step=1,
lowest=1,
value=saved_args.get('train_batch_size',8),
)
self.gradient_accumulation_steps = self.add_widget_intelligent(
npyscreen.TitleSlider,
name='Gradient Accumulation Steps (may need to decrease this to resume from a checkpoint):',
out_of=10,
step=1,
lowest=1,
value=saved_args.get('gradient_accumulation_steps',4)
)
self.lr_warmup_steps = self.add_widget_intelligent(
npyscreen.TitleSlider,
name='Warmup Steps:',
out_of=100,
step=1,
lowest=0,
value=saved_args.get('lr_warmup_steps',0),
)
self.learning_rate = self.add_widget_intelligent(
npyscreen.TitleText,
name="Learning Rate:",
value=str(saved_args.get('learning_rate','5.0e-04'),)
)
self.scale_lr = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Scale learning rate by number GPUs, steps and batch size",
value=saved_args.get('scale_lr',True),
)
self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
npyscreen.Checkbox,
name="Use xformers acceleration",
value=saved_args.get('enable_xformers_memory_efficient_attention',False),
)
self.lr_scheduler = self.add_widget_intelligent(
npyscreen.TitleSelectOne,
name='Learning rate scheduler:',
values = self.lr_schedulers,
max_height=7,
scroll_exit = True,
value=self.lr_schedulers.index(saved_args.get('lr_scheduler','constant')),
)
def initializer_changed(self):
placeholder = self.placeholder_token.value
self.prompt_token.value = f'(Trigger by using <{placeholder}> in your prompts)'
self.train_data_dir.value = str(Path(Globals.root) / TRAINING_DATA / placeholder)
self.output_dir.value = str(Path(Globals.root) / TRAINING_DIR / placeholder)
self.resume_from_checkpoint.value = Path(self.output_dir.value).exists()
def on_ok(self):
if self.validate_field_values():
self.parentApp.setNextForm(None)
self.editing = False
self.parentApp.ti_arguments = self.marshall_arguments()
npyscreen.notify('Launching textual inversion training. This will take a while...')
# The module load takes a while, so we do it while the form and message are still up
import ldm.invoke.textual_inversion_training
else:
self.editing = True
def ok_cancel(self):
sys.exit(0)
def validate_field_values(self)->bool:
bad_fields = []
if self.model.value is None:
bad_fields.append('Model Name must correspond to a known model in models.yaml')
if not re.match('^[a-zA-Z0-9.-]+$',self.placeholder_token.value):
bad_fields.append('Trigger term must only contain alphanumeric characters, the dot and hyphen')
if self.train_data_dir.value is None:
bad_fields.append('Data Training Directory cannot be empty')
if self.output_dir.value is None:
bad_fields.append('The Output Destination Directory cannot be empty')
if len(bad_fields) > 0:
message = 'The following problems were detected and must be corrected:'
for problem in bad_fields:
message += f'\n* {problem}'
npyscreen.notify_confirm(message)
return False
else:
return True
def get_model_names(self)->(List[str],int):
conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml'))
model_names = [idx for idx in sorted(list(conf.keys())) if conf[idx].get('format',None)=='diffusers']
defaults = [idx for idx in range(len(model_names)) if 'default' in conf[model_names[idx]]]
return (model_names,defaults[0])
def marshall_arguments(self)->dict:
args = dict()
# the choices
args.update(
model = self.model_names[self.model.value[0]],
resolution = self.resolutions[self.resolution.value[0]],
lr_scheduler = self.lr_schedulers[self.lr_scheduler.value[0]],
mixed_precision = self.precisions[self.mixed_precision.value[0]],
learnable_property = self.learnable_properties[self.learnable_property.value[0]],
)
# all the strings and booleans
for attr in ('initializer_token','placeholder_token','train_data_dir',
'output_dir','scale_lr','center_crop','enable_xformers_memory_efficient_attention'):
args[attr] = getattr(self,attr).value
# all the integers
for attr in ('train_batch_size','gradient_accumulation_steps',
'num_train_epochs','max_train_steps','lr_warmup_steps'):
args[attr] = int(getattr(self,attr).value)
# the floats (just one)
args.update(
learning_rate = float(self.learning_rate.value)
)
# a special case
if self.resume_from_checkpoint.value and Path(self.output_dir.value).exists():
args['resume_from_checkpoint'] = 'latest'
return args
class MyApplication(npyscreen.NPSAppManaged):
def __init__(self, saved_args=None):
super().__init__()
self.ti_arguments=None
self.saved_args=saved_args
def onStart(self):
npyscreen.setTheme(npyscreen.Themes.DefaultTheme)
self.main = self.addForm('MAIN', textualInversionForm, name='Textual Inversion Settings', saved_args=self.saved_args)
def copy_to_embeddings_folder(args:dict):
'''
Copy learned_embeds.bin into the embeddings folder, and offer to
delete the full model and checkpoints.
'''
source = Path(args['output_dir'],'learned_embeds.bin')
dest_dir_name = args['placeholder_token'].strip('<>')
destination = Path(Globals.root,'embeddings',dest_dir_name)
os.makedirs(destination,exist_ok=True)
print(f'>> Training completed. Copying learned_embeds.bin into {str(destination)}')
shutil.copy(source,destination)
if (input('Delete training logs and intermediate checkpoints? [y] ') or 'y').startswith(('y','Y')):
shutil.rmtree(Path(args['output_dir']))
else:
print(f'>> Keeping {args["output_dir"]}')
def save_args(args:dict):
'''
Save the current argument values to an omegaconf file
'''
dest_dir = Path(Globals.root) / TRAINING_DIR
os.makedirs(dest_dir, exist_ok=True)
conf_file = dest_dir / CONF_FILE
conf = OmegaConf.create(args)
OmegaConf.save(config=conf, f=conf_file)
def previous_args()->dict:
'''
Get the previous arguments used.
'''
conf_file = Path(Globals.root) / TRAINING_DIR / CONF_FILE
try:
conf = OmegaConf.load(conf_file)
conf['placeholder_token'] = conf['placeholder_token'].strip('<>')
except:
conf= None
return conf
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='InvokeAI textual inversion training')
parser.add_argument(
'--root_dir','--root-dir',
type=Path,
default=Globals.root,
help='Path to the invokeai runtime directory',
)
args = parser.parse_args()
global_set_root(args.root_dir)
saved_args = previous_args()
myapplication = MyApplication(saved_args=saved_args)
myapplication.run()
from ldm.invoke.textual_inversion_training import do_textual_inversion_training
if args := myapplication.ti_arguments:
os.makedirs(args['output_dir'],exist_ok=True)
# Automatically add angle brackets around the trigger
if not re.match('^<.+>$',args['placeholder_token']):
args['placeholder_token'] = f"<{args['placeholder_token']}>"
args['only_save_embeds'] = True
save_args(args)
try:
print(f'DEBUG: args = {args}')
do_textual_inversion_training(**args)
copy_to_embeddings_folder(args)
except Exception as e:
print('** An exception occurred during training. The exception was:')
print(str(e))
print('** DETAILS:')
print(traceback.format_exc())