diff --git a/docs/features/TEXTUAL_INVERSION.md b/docs/features/TEXTUAL_INVERSION.md index 7d54ea971c..801160401b 100644 --- a/docs/features/TEXTUAL_INVERSION.md +++ b/docs/features/TEXTUAL_INVERSION.md @@ -55,7 +55,7 @@ Please enter 1, 2, 3, or 4: [1] 3 From the command line, with the InvokeAI virtual environment active, you can launch the front end with the command -`textual_inversion_fe`. +`textual_inversion --gui`. This will launch a text-based front end that will look like this: @@ -219,11 +219,9 @@ term. For example `a plate of banana sushi in style`. ## **Training with the Command-Line Script** -InvokeAI also comes with a traditional command-line script for -launching textual inversion training. It is named -`textual_inversion`, and can be launched from within the -"developer's console", or from the command line after activating -InvokeAI's virtual environment. +Training can also be done using a traditional command-line script. It +can be launched from within the "developer's console", or from the +command line after activating InvokeAI's virtual environment. It accepts a large number of arguments, which can be summarized by passing the `--help` argument: diff --git a/ldm/invoke/textual_inversion.py b/ldm/invoke/textual_inversion.py new file mode 100755 index 0000000000..939287ddea --- /dev/null +++ b/ldm/invoke/textual_inversion.py @@ -0,0 +1,414 @@ +#!/usr/bin/env python + +import argparse +import curses +import os +import re +import shutil +import sys +import traceback +from argparse import Namespace +from pathlib import Path +from typing import List + +import npyscreen +from omegaconf import OmegaConf + +from ldm.invoke.globals import Globals, global_set_root +from ldm.invoke.textual_inversion_training import ( + do_textual_inversion_training, + parse_args, +) + +TRAINING_DATA = "text-inversion-training-data" +TRAINING_DIR = "text-inversion-output" +CONF_FILE = "preferences.conf" + + +class textualInversionForm(npyscreen.FormMultiPageAction): + resolutions = [512, 768, 1024] + lr_schedulers = [ + "linear", + "cosine", + "cosine_with_restarts", + "polynomial", + "constant", + "constant_with_warmup", + ] + precisions = ["no", "fp16", "bf16"] + learnable_properties = ["object", "style"] + + def __init__(self, parentApp, name, saved_args=None): + self.saved_args = saved_args or {} + super().__init__(parentApp, name) + + def afterEditing(self): + self.parentApp.setNextForm(None) + + def create(self): + self.model_names, default = self.get_model_names() + default_initializer_token = "★" + default_placeholder_token = "" + saved_args = self.saved_args + + try: + default = self.model_names.index(saved_args["model"]) + except: + pass + + self.add_widget_intelligent( + npyscreen.FixedText, + value="Use ctrl-N and ctrl-P to move to the ext and

revious fields, cursor arrows to make a selection, and space to toggle checkboxes.", + ) + + self.model = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name="Model Name:", + values=self.model_names, + value=default, + max_height=len(self.model_names) + 1, + ) + self.placeholder_token = self.add_widget_intelligent( + npyscreen.TitleText, + name="Trigger Term:", + value="", # saved_args.get('placeholder_token',''), # to restore previous term + ) + self.placeholder_token.when_value_edited = self.initializer_changed + self.nextrely -= 1 + self.nextrelx += 30 + self.prompt_token = self.add_widget_intelligent( + npyscreen.FixedText, + name="Trigger term for use in prompt", + value="", + ) + self.nextrelx -= 30 + self.initializer_token = self.add_widget_intelligent( + npyscreen.TitleText, + name="Initializer:", + value=saved_args.get("initializer_token", default_initializer_token), + ) + self.resume_from_checkpoint = self.add_widget_intelligent( + npyscreen.Checkbox, + name="Resume from last saved checkpoint", + value=False, + ) + self.learnable_property = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name="Learnable property:", + values=self.learnable_properties, + value=self.learnable_properties.index( + saved_args.get("learnable_property", "object") + ), + max_height=4, + ) + self.train_data_dir = self.add_widget_intelligent( + npyscreen.TitleFilename, + name="Data Training Directory:", + select_dir=True, + must_exist=False, + value=str( + saved_args.get( + "train_data_dir", + Path(Globals.root) / TRAINING_DATA / default_placeholder_token, + ) + ), + ) + self.output_dir = self.add_widget_intelligent( + npyscreen.TitleFilename, + name="Output Destination Directory:", + select_dir=True, + must_exist=False, + value=str( + saved_args.get( + "output_dir", + Path(Globals.root) / TRAINING_DIR / default_placeholder_token, + ) + ), + ) + self.resolution = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name="Image resolution (pixels):", + values=self.resolutions, + value=self.resolutions.index(saved_args.get("resolution", 512)), + scroll_exit=True, + max_height=4, + ) + self.center_crop = self.add_widget_intelligent( + npyscreen.Checkbox, + name="Center crop images before resizing to resolution", + value=saved_args.get("center_crop", False), + ) + self.mixed_precision = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name="Mixed Precision:", + values=self.precisions, + value=self.precisions.index(saved_args.get("mixed_precision", "fp16")), + max_height=4, + ) + self.num_train_epochs = self.add_widget_intelligent( + npyscreen.TitleSlider, + name="Number of training epochs:", + out_of=1000, + step=50, + lowest=1, + value=saved_args.get("num_train_epochs", 100), + ) + self.max_train_steps = self.add_widget_intelligent( + npyscreen.TitleSlider, + name="Max Training Steps:", + out_of=10000, + step=500, + lowest=1, + value=saved_args.get("max_train_steps", 3000), + ) + self.train_batch_size = self.add_widget_intelligent( + npyscreen.TitleSlider, + name="Batch Size (reduce if you run out of memory):", + out_of=50, + step=1, + lowest=1, + value=saved_args.get("train_batch_size", 8), + ) + self.gradient_accumulation_steps = self.add_widget_intelligent( + npyscreen.TitleSlider, + name="Gradient Accumulation Steps (may need to decrease this to resume from a checkpoint):", + out_of=10, + step=1, + lowest=1, + value=saved_args.get("gradient_accumulation_steps", 4), + ) + self.lr_warmup_steps = self.add_widget_intelligent( + npyscreen.TitleSlider, + name="Warmup Steps:", + out_of=100, + step=1, + lowest=0, + value=saved_args.get("lr_warmup_steps", 0), + ) + self.learning_rate = self.add_widget_intelligent( + npyscreen.TitleText, + name="Learning Rate:", + value=str( + saved_args.get("learning_rate", "5.0e-04"), + ), + ) + self.scale_lr = self.add_widget_intelligent( + npyscreen.Checkbox, + name="Scale learning rate by number GPUs, steps and batch size", + value=saved_args.get("scale_lr", True), + ) + self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent( + npyscreen.Checkbox, + name="Use xformers acceleration", + value=saved_args.get("enable_xformers_memory_efficient_attention", False), + ) + self.lr_scheduler = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name="Learning rate scheduler:", + values=self.lr_schedulers, + max_height=7, + scroll_exit=True, + value=self.lr_schedulers.index(saved_args.get("lr_scheduler", "constant")), + ) + + def initializer_changed(self): + placeholder = self.placeholder_token.value + self.prompt_token.value = f"(Trigger by using <{placeholder}> in your prompts)" + self.train_data_dir.value = str( + Path(Globals.root) / TRAINING_DATA / placeholder + ) + self.output_dir.value = str(Path(Globals.root) / TRAINING_DIR / placeholder) + self.resume_from_checkpoint.value = Path(self.output_dir.value).exists() + + def on_ok(self): + if self.validate_field_values(): + self.parentApp.setNextForm(None) + self.editing = False + self.parentApp.ti_arguments = self.marshall_arguments() + npyscreen.notify( + "Launching textual inversion training. This will take a while..." + ) + else: + self.editing = True + + def ok_cancel(self): + sys.exit(0) + + def validate_field_values(self) -> bool: + bad_fields = [] + if self.model.value is None: + bad_fields.append( + "Model Name must correspond to a known model in models.yaml" + ) + if not re.match("^[a-zA-Z0-9.-]+$", self.placeholder_token.value): + bad_fields.append( + "Trigger term must only contain alphanumeric characters, the dot and hyphen" + ) + if self.train_data_dir.value is None: + bad_fields.append("Data Training Directory cannot be empty") + if self.output_dir.value is None: + bad_fields.append("The Output Destination Directory cannot be empty") + if len(bad_fields) > 0: + message = "The following problems were detected and must be corrected:" + for problem in bad_fields: + message += f"\n* {problem}" + npyscreen.notify_confirm(message) + return False + else: + return True + + def get_model_names(self) -> (List[str], int): + conf = OmegaConf.load(os.path.join(Globals.root, "configs/models.yaml")) + model_names = [ + idx + for idx in sorted(list(conf.keys())) + if conf[idx].get("format", None) == "diffusers" + ] + defaults = [ + idx + for idx in range(len(model_names)) + if "default" in conf[model_names[idx]] + ] + return (model_names, defaults[0]) + + def marshall_arguments(self) -> dict: + args = dict() + + # the choices + args.update( + model=self.model_names[self.model.value[0]], + resolution=self.resolutions[self.resolution.value[0]], + lr_scheduler=self.lr_schedulers[self.lr_scheduler.value[0]], + mixed_precision=self.precisions[self.mixed_precision.value[0]], + learnable_property=self.learnable_properties[ + self.learnable_property.value[0] + ], + ) + + # all the strings and booleans + for attr in ( + "initializer_token", + "placeholder_token", + "train_data_dir", + "output_dir", + "scale_lr", + "center_crop", + "enable_xformers_memory_efficient_attention", + ): + args[attr] = getattr(self, attr).value + + # all the integers + for attr in ( + "train_batch_size", + "gradient_accumulation_steps", + "num_train_epochs", + "max_train_steps", + "lr_warmup_steps", + ): + args[attr] = int(getattr(self, attr).value) + + # the floats (just one) + args.update(learning_rate=float(self.learning_rate.value)) + + # a special case + if self.resume_from_checkpoint.value and Path(self.output_dir.value).exists(): + args["resume_from_checkpoint"] = "latest" + + return args + + +class MyApplication(npyscreen.NPSAppManaged): + def __init__(self, saved_args=None): + super().__init__() + self.ti_arguments = None + self.saved_args = saved_args + + def onStart(self): + npyscreen.setTheme(npyscreen.Themes.DefaultTheme) + self.main = self.addForm( + "MAIN", + textualInversionForm, + name="Textual Inversion Settings", + saved_args=self.saved_args, + ) + + +def copy_to_embeddings_folder(args: dict): + """ + Copy learned_embeds.bin into the embeddings folder, and offer to + delete the full model and checkpoints. + """ + source = Path(args["output_dir"], "learned_embeds.bin") + dest_dir_name = args["placeholder_token"].strip("<>") + destination = Path(Globals.root, "embeddings", dest_dir_name) + os.makedirs(destination, exist_ok=True) + print(f">> Training completed. Copying learned_embeds.bin into {str(destination)}") + shutil.copy(source, destination) + if ( + input("Delete training logs and intermediate checkpoints? [y] ") or "y" + ).startswith(("y", "Y")): + shutil.rmtree(Path(args["output_dir"])) + else: + print(f'>> Keeping {args["output_dir"]}') + + +def save_args(args: dict): + """ + Save the current argument values to an omegaconf file + """ + dest_dir = Path(Globals.root) / TRAINING_DIR + os.makedirs(dest_dir, exist_ok=True) + conf_file = dest_dir / CONF_FILE + conf = OmegaConf.create(args) + OmegaConf.save(config=conf, f=conf_file) + + +def previous_args() -> dict: + """ + Get the previous arguments used. + """ + conf_file = Path(Globals.root) / TRAINING_DIR / CONF_FILE + try: + conf = OmegaConf.load(conf_file) + conf["placeholder_token"] = conf["placeholder_token"].strip("<>") + except: + conf = None + + return conf + + +def do_front_end(args: Namespace): + saved_args = previous_args() + myapplication = MyApplication(saved_args=saved_args) + myapplication.run() + + if args := myapplication.ti_arguments: + os.makedirs(args["output_dir"], exist_ok=True) + + # Automatically add angle brackets around the trigger + if not re.match("^<.+>$", args["placeholder_token"]): + args["placeholder_token"] = f"<{args['placeholder_token']}>" + + args["only_save_embeds"] = True + save_args(args) + + try: + print(f"DEBUG: args = {args}") + do_textual_inversion_training(**args) + copy_to_embeddings_folder(args) + except Exception as e: + print("** An exception occurred during training. The exception was:") + print(str(e)) + print("** DETAILS:") + print(traceback.format_exc()) + +def main(): + args = parse_args() + global_set_root(args.root_dir or Globals.root) + try: + if args.front_end: + do_front_end(args) + else: + do_textual_inversion_training(**vars(args)) + except AssertionError as e: + print(str(e)) diff --git a/ldm/invoke/textual_inversion_training.py b/ldm/invoke/textual_inversion_training.py index 7003a149fb..9965bcaf4d 100644 --- a/ldm/invoke/textual_inversion_training.py +++ b/ldm/invoke/textual_inversion_training.py @@ -31,6 +31,10 @@ from diffusers.utils.import_utils import is_xformers_available from huggingface_hub import HfFolder, Repository, whoami # invokeai stuff +from ldm.invoke.args import ( + PagingArgumentParser, + ArgFormatter +) from ldm.invoke.globals import Globals, global_cache_dir from omegaconf import OmegaConf @@ -74,145 +78,32 @@ def save_progress(text_encoder, placeholder_token_id, accelerator, placeholder_t torch.save(learned_embeds_dict, save_path) def parse_args(): - parser = argparse.ArgumentParser(description="Simple example of a training script.") - parser.add_argument( - "--save_steps", - type=int, - default=500, - help="Save learned_embeds.bin every X updates steps.", + parser = PagingArgumentParser( + description="Textual inversion training", + formatter_class=ArgFormatter ) - parser.add_argument( + general_group = parser.add_argument_group('General') + model_group = parser.add_argument_group('Models and Paths') + image_group = parser.add_argument_group('Training Image Location and Options') + trigger_group = parser.add_argument_group('Trigger Token') + training_group = parser.add_argument_group('Training Parameters') + checkpointing_group = parser.add_argument_group('Checkpointing and Resume') + integration_group = parser.add_argument_group('Integration') + general_group.add_argument( + '--front_end', + '--gui', + dest='front_end', + action="store_true", + default=False, + help="Activate the text-based graphical front end for collecting parameters. Other parameters will be ignored." + ) + general_group.add_argument( '--root_dir','--root', type=Path, default=Globals.root, help="Path to the invokeai runtime directory", ) - parser.add_argument( - "--only_save_embeds", - action="store_true", - default=False, - help="Save only the embeddings for the new concept.", - ) - parser.add_argument( - "--model", - type=str, - default=None, - required=True, - help="Name of the diffusers model to train against, as defined in configs/models.yaml.", - ) - parser.add_argument( - "--revision", - type=str, - default=None, - required=False, - help="Revision of pretrained model identifier from huggingface.co/models.", - ) - parser.add_argument( - "--tokenizer_name", - type=str, - default=None, - help="Pretrained tokenizer name or path if not the same as model_name", - ) - parser.add_argument( - "--train_data_dir", - type=Path, - default=None, - required=True, - help="A folder containing the training data." - ) - parser.add_argument( - "--placeholder_token", - type=str, - default=None, - required=True, - help="A token to use as a placeholder for the concept.", - ) - parser.add_argument( - "--initializer_token", - type=str, - default=None, - required=False, - help="A token to use as initializer word." - ) - parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'") - parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.") - parser.add_argument( - "--output_dir", - type=Path, - default=f'{Globals.root}/text-inversion-model', - help="The output directory where the model predictions and checkpoints will be written.", - ) - parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") - parser.add_argument( - "--resolution", - type=int, - default=512, - help=( - "The resolution for input images, all the images in the train/validation dataset will be resized to this" - " resolution" - ), - ) - parser.add_argument( - "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution" - ) - parser.add_argument( - "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader." - ) - parser.add_argument("--num_train_epochs", type=int, default=100) - parser.add_argument( - "--max_train_steps", - type=int, - default=5000, - help="Total number of training steps to perform. If provided, overrides num_train_epochs.", - ) - parser.add_argument( - "--gradient_accumulation_steps", - type=int, - default=1, - help="Number of updates steps to accumulate before performing a backward/update pass.", - ) - parser.add_argument( - "--gradient_checkpointing", - action="store_true", - help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.", - ) - parser.add_argument( - "--learning_rate", - type=float, - default=1e-4, - help="Initial learning rate (after the potential warmup period) to use.", - ) - parser.add_argument( - "--scale_lr", - action="store_true", - default=True, - help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.", - ) - parser.add_argument( - "--lr_scheduler", - type=str, - default="constant", - help=( - 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",' - ' "constant", "constant_with_warmup"]' - ), - ) - parser.add_argument( - "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler." - ) - parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.") - parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.") - parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.") - parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer") - parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") - parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.") - parser.add_argument( - "--hub_model_id", - type=str, - default=None, - help="The name of the repository to keep in sync with the local `output_dir`.", - ) - parser.add_argument( + general_group.add_argument( "--logging_dir", type=Path, default="logs", @@ -221,7 +112,147 @@ def parse_args(): " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***." ), ) - parser.add_argument( + general_group.add_argument( + "--output_dir", + type=Path, + default=f'{Globals.root}/text-inversion-model', + help="The output directory where the model predictions and checkpoints will be written.", + ) + model_group.add_argument( + "--model", + type=str, + default='stable-diffusion-1.5', + help="Name of the diffusers model to train against, as defined in configs/models.yaml.", + ) + model_group.add_argument( + "--revision", + type=str, + default=None, + required=False, + help="Revision of pretrained model identifier from huggingface.co/models.", + ) + + model_group.add_argument( + "--tokenizer_name", + type=str, + default=None, + help="Pretrained tokenizer name or path if not the same as model_name", + ) + image_group.add_argument( + "--train_data_dir", + type=Path, + default=None, + help="A folder containing the training data." + ) + image_group.add_argument( + "--resolution", + type=int, + default=512, + help=( + "The resolution for input images, all the images in the train/validation dataset will be resized to this" + " resolution" + ), + ) + image_group.add_argument( + "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution" + ) + trigger_group.add_argument( + "--placeholder_token", + "--trigger_term", + dest='placeholder_token', + type=str, + default=None, + help="A token to use as a placeholder for the concept. This token will trigger the concept when included in the prompt as \"\".", + ) + trigger_group.add_argument( + "--learnable_property", + type=str, + choices=['object','style'], + default="object", + help="Choose between 'object' and 'style'" + ) + trigger_group.add_argument( + "--initializer_token", + type=str, + default='*', + help="A symbol to use as the initializer word." + ) + checkpointing_group.add_argument( + "--checkpointing_steps", + type=int, + default=500, + help=( + "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming" + " training using `--resume_from_checkpoint`." + ), + ) + checkpointing_group.add_argument( + "--resume_from_checkpoint", + type=Path, + default=None, + help=( + "Whether training should be resumed from a previous checkpoint. Use a path saved by" + ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.' + ), + ) + checkpointing_group.add_argument( + "--save_steps", + type=int, + default=500, + help="Save learned_embeds.bin every X updates steps.", + ) + training_group.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.") + training_group.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") + training_group.add_argument( + "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader." + ) + training_group.add_argument("--num_train_epochs", type=int, default=100) + training_group.add_argument( + "--max_train_steps", + type=int, + default=5000, + help="Total number of training steps to perform. If provided, overrides num_train_epochs.", + ) + training_group.add_argument( + "--gradient_accumulation_steps", + type=int, + default=1, + help="Number of updates steps to accumulate before performing a backward/update pass.", + ) + training_group.add_argument( + "--gradient_checkpointing", + action="store_true", + help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.", + ) + training_group.add_argument( + "--learning_rate", + type=float, + default=1e-4, + help="Initial learning rate (after the potential warmup period) to use.", + ) + training_group.add_argument( + "--scale_lr", + action="store_true", + default=True, + help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.", + ) + training_group.add_argument( + "--lr_scheduler", + type=str, + default="constant", + help=( + 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",' + ' "constant", "constant_with_warmup"]' + ), + ) + training_group.add_argument( + "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler." + ) + training_group.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.") + training_group.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.") + training_group.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.") + training_group.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer") + training_group.add_argument( "--mixed_precision", type=str, default="no", @@ -232,7 +263,7 @@ def parse_args(): "and an Nvidia Ampere GPU." ), ) - parser.add_argument( + training_group.add_argument( "--allow_tf32", action="store_true", help=( @@ -240,7 +271,24 @@ def parse_args(): " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" ), ) + training_group.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") parser.add_argument( + "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers." + ) + + integration_group.add_argument( + "--only_save_embeds", + action="store_true", + default=False, + help="Save only the embeddings for the new concept.", + ) + integration_group.add_argument( + "--hub_model_id", + type=str, + default=None, + help="The name of the repository to keep in sync with the local `output_dir`.", + ) + integration_group.add_argument( "--report_to", type=str, default="tensorboard", @@ -249,29 +297,8 @@ def parse_args(): ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.' ), ) - parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") - parser.add_argument( - "--checkpointing_steps", - type=int, - default=500, - help=( - "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming" - " training using `--resume_from_checkpoint`." - ), - ) - parser.add_argument( - "--resume_from_checkpoint", - type=Path, - default=None, - help=( - "Whether training should be resumed from a previous checkpoint. Use a path saved by" - ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.' - ), - ) - parser.add_argument( - "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers." - ) - + integration_group.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + integration_group.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.") args = parser.parse_args() return args @@ -462,7 +489,11 @@ def do_textual_inversion_training( enable_xformers_memory_efficient_attention:bool=False, root_dir:Path=None, hub_model_id:str=None, + **kwargs, ): + assert model, 'Please specify a base model with --model' + assert train_data_dir, 'Please specify a directory containing the training images using --train_data_dir' + assert placeholder_token, 'Please specify a trigger term using --placeholder_token' env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) if env_local_rank != -1 and env_local_rank != local_rank: local_rank = env_local_rank diff --git a/pyproject.toml b/pyproject.toml index 8359f14e6d..7ce879a35e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,6 +98,8 @@ test = ["pytest>6.0.0", "pytest-cov"] "load_models" = "scripts:configure_invokeai.main" "merge_embeddings" = "scripts:merge_embeddings.main" "preload_models" = "ldm.invoke.configure_invokeai:main" +"textual_inversion" = "ldm.invoke.textual_inversion:main" +"merge_models" = "ldm.invoke.merge_models:main" [project.urls] "Homepage" = "https://invoke-ai.github.io/InvokeAI/" diff --git a/scripts/merge_models.py b/scripts/merge_models.py index 1d8ac10018..4fe6d93854 100755 --- a/scripts/merge_models.py +++ b/scripts/merge_models.py @@ -15,7 +15,6 @@ from ldm.invoke.model_manager import ModelManager parser = argparse.ArgumentParser(description="InvokeAI textual inversion training") parser.add_argument( "--root_dir", - "--root-dir", type=Path, default=Globals.root, help="Path to the invokeai runtime directory", diff --git a/scripts/textual_inversion_fe.py b/scripts/textual_inversion_fe.py deleted file mode 100755 index 0639d9c2c8..0000000000 --- a/scripts/textual_inversion_fe.py +++ /dev/null @@ -1,350 +0,0 @@ -#!/usr/bin/env python - -import npyscreen -import os -import sys -import re -import shutil -import traceback -import curses -from ldm.invoke.globals import Globals, global_set_root -from omegaconf import OmegaConf -from pathlib import Path -from typing import List -import argparse - -TRAINING_DATA = 'text-inversion-training-data' -TRAINING_DIR = 'text-inversion-output' -CONF_FILE = 'preferences.conf' - -class textualInversionForm(npyscreen.FormMultiPageAction): - resolutions = [512, 768, 1024] - lr_schedulers = [ - "linear", "cosine", "cosine_with_restarts", - "polynomial","constant", "constant_with_warmup" - ] - precisions = ['no','fp16','bf16'] - learnable_properties = ['object','style'] - - def __init__(self, parentApp, name, saved_args=None): - self.saved_args = saved_args or {} - super().__init__(parentApp, name) - - def afterEditing(self): - self.parentApp.setNextForm(None) - - def create(self): - self.model_names, default = self.get_model_names() - default_initializer_token = '★' - default_placeholder_token = '' - saved_args = self.saved_args - - try: - default = self.model_names.index(saved_args['model']) - except: - pass - - self.add_widget_intelligent( - npyscreen.FixedText, - value='Use ctrl-N and ctrl-P to move to the ext and

revious fields, cursor arrows to make a selection, and space to toggle checkboxes.' - ) - - self.model = self.add_widget_intelligent( - npyscreen.TitleSelectOne, - name='Model Name:', - values=self.model_names, - value=default, - max_height=len(self.model_names)+1 - ) - self.placeholder_token = self.add_widget_intelligent( - npyscreen.TitleText, - name='Trigger Term:', - value='', # saved_args.get('placeholder_token',''), # to restore previous term - ) - self.placeholder_token.when_value_edited = self.initializer_changed - self.nextrely -= 1 - self.nextrelx += 30 - self.prompt_token = self.add_widget_intelligent( - npyscreen.FixedText, - name="Trigger term for use in prompt", - value='', - ) - self.nextrelx -= 30 - self.initializer_token = self.add_widget_intelligent( - npyscreen.TitleText, - name='Initializer:', - value=saved_args.get('initializer_token',default_initializer_token), - ) - self.resume_from_checkpoint = self.add_widget_intelligent( - npyscreen.Checkbox, - name="Resume from last saved checkpoint", - value=False, - ) - self.learnable_property = self.add_widget_intelligent( - npyscreen.TitleSelectOne, - name="Learnable property:", - values=self.learnable_properties, - value=self.learnable_properties.index(saved_args.get('learnable_property','object')), - max_height=4, - ) - self.train_data_dir = self.add_widget_intelligent( - npyscreen.TitleFilename, - name='Data Training Directory:', - select_dir=True, - must_exist=False, - value=str(saved_args.get('train_data_dir',Path(Globals.root) / TRAINING_DATA / default_placeholder_token)) - ) - self.output_dir = self.add_widget_intelligent( - npyscreen.TitleFilename, - name='Output Destination Directory:', - select_dir=True, - must_exist=False, - value=str(saved_args.get('output_dir',Path(Globals.root) / TRAINING_DIR / default_placeholder_token)) - ) - self.resolution = self.add_widget_intelligent( - npyscreen.TitleSelectOne, - name='Image resolution (pixels):', - values = self.resolutions, - value=self.resolutions.index(saved_args.get('resolution',512)), - scroll_exit = True, - max_height=4, - ) - self.center_crop = self.add_widget_intelligent( - npyscreen.Checkbox, - name="Center crop images before resizing to resolution", - value=saved_args.get('center_crop',False) - ) - self.mixed_precision = self.add_widget_intelligent( - npyscreen.TitleSelectOne, - name='Mixed Precision:', - values=self.precisions, - value=self.precisions.index(saved_args.get('mixed_precision','fp16')), - max_height=4, - ) - self.num_train_epochs = self.add_widget_intelligent( - npyscreen.TitleSlider, - name='Number of training epochs:', - out_of=1000, - step=50, - lowest=1, - value=saved_args.get('num_train_epochs',100) - ) - self.max_train_steps = self.add_widget_intelligent( - npyscreen.TitleSlider, - name='Max Training Steps:', - out_of=10000, - step=500, - lowest=1, - value=saved_args.get('max_train_steps',3000) - ) - self.train_batch_size = self.add_widget_intelligent( - npyscreen.TitleSlider, - name='Batch Size (reduce if you run out of memory):', - out_of=50, - step=1, - lowest=1, - value=saved_args.get('train_batch_size',8), - ) - self.gradient_accumulation_steps = self.add_widget_intelligent( - npyscreen.TitleSlider, - name='Gradient Accumulation Steps (may need to decrease this to resume from a checkpoint):', - out_of=10, - step=1, - lowest=1, - value=saved_args.get('gradient_accumulation_steps',4) - ) - self.lr_warmup_steps = self.add_widget_intelligent( - npyscreen.TitleSlider, - name='Warmup Steps:', - out_of=100, - step=1, - lowest=0, - value=saved_args.get('lr_warmup_steps',0), - ) - self.learning_rate = self.add_widget_intelligent( - npyscreen.TitleText, - name="Learning Rate:", - value=str(saved_args.get('learning_rate','5.0e-04'),) - ) - self.scale_lr = self.add_widget_intelligent( - npyscreen.Checkbox, - name="Scale learning rate by number GPUs, steps and batch size", - value=saved_args.get('scale_lr',True), - ) - self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent( - npyscreen.Checkbox, - name="Use xformers acceleration", - value=saved_args.get('enable_xformers_memory_efficient_attention',False), - ) - self.lr_scheduler = self.add_widget_intelligent( - npyscreen.TitleSelectOne, - name='Learning rate scheduler:', - values = self.lr_schedulers, - max_height=7, - scroll_exit = True, - value=self.lr_schedulers.index(saved_args.get('lr_scheduler','constant')), - ) - - def initializer_changed(self): - placeholder = self.placeholder_token.value - self.prompt_token.value = f'(Trigger by using <{placeholder}> in your prompts)' - self.train_data_dir.value = str(Path(Globals.root) / TRAINING_DATA / placeholder) - self.output_dir.value = str(Path(Globals.root) / TRAINING_DIR / placeholder) - self.resume_from_checkpoint.value = Path(self.output_dir.value).exists() - - def on_ok(self): - if self.validate_field_values(): - self.parentApp.setNextForm(None) - self.editing = False - self.parentApp.ti_arguments = self.marshall_arguments() - npyscreen.notify('Launching textual inversion training. This will take a while...') - # The module load takes a while, so we do it while the form and message are still up - import ldm.invoke.textual_inversion_training - else: - self.editing = True - - def ok_cancel(self): - sys.exit(0) - - def validate_field_values(self)->bool: - bad_fields = [] - if self.model.value is None: - bad_fields.append('Model Name must correspond to a known model in models.yaml') - if not re.match('^[a-zA-Z0-9.-]+$',self.placeholder_token.value): - bad_fields.append('Trigger term must only contain alphanumeric characters, the dot and hyphen') - if self.train_data_dir.value is None: - bad_fields.append('Data Training Directory cannot be empty') - if self.output_dir.value is None: - bad_fields.append('The Output Destination Directory cannot be empty') - if len(bad_fields) > 0: - message = 'The following problems were detected and must be corrected:' - for problem in bad_fields: - message += f'\n* {problem}' - npyscreen.notify_confirm(message) - return False - else: - return True - - def get_model_names(self)->(List[str],int): - conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml')) - model_names = [idx for idx in sorted(list(conf.keys())) if conf[idx].get('format',None)=='diffusers'] - defaults = [idx for idx in range(len(model_names)) if 'default' in conf[model_names[idx]]] - return (model_names,defaults[0]) - - def marshall_arguments(self)->dict: - args = dict() - - # the choices - args.update( - model = self.model_names[self.model.value[0]], - resolution = self.resolutions[self.resolution.value[0]], - lr_scheduler = self.lr_schedulers[self.lr_scheduler.value[0]], - mixed_precision = self.precisions[self.mixed_precision.value[0]], - learnable_property = self.learnable_properties[self.learnable_property.value[0]], - ) - - # all the strings and booleans - for attr in ('initializer_token','placeholder_token','train_data_dir', - 'output_dir','scale_lr','center_crop','enable_xformers_memory_efficient_attention'): - args[attr] = getattr(self,attr).value - - # all the integers - for attr in ('train_batch_size','gradient_accumulation_steps', - 'num_train_epochs','max_train_steps','lr_warmup_steps'): - args[attr] = int(getattr(self,attr).value) - - # the floats (just one) - args.update( - learning_rate = float(self.learning_rate.value) - ) - - # a special case - if self.resume_from_checkpoint.value and Path(self.output_dir.value).exists(): - args['resume_from_checkpoint'] = 'latest' - - return args - -class MyApplication(npyscreen.NPSAppManaged): - def __init__(self, saved_args=None): - super().__init__() - self.ti_arguments=None - self.saved_args=saved_args - - def onStart(self): - npyscreen.setTheme(npyscreen.Themes.DefaultTheme) - self.main = self.addForm('MAIN', textualInversionForm, name='Textual Inversion Settings', saved_args=self.saved_args) - -def copy_to_embeddings_folder(args:dict): - ''' - Copy learned_embeds.bin into the embeddings folder, and offer to - delete the full model and checkpoints. - ''' - source = Path(args['output_dir'],'learned_embeds.bin') - dest_dir_name = args['placeholder_token'].strip('<>') - destination = Path(Globals.root,'embeddings',dest_dir_name) - os.makedirs(destination,exist_ok=True) - print(f'>> Training completed. Copying learned_embeds.bin into {str(destination)}') - shutil.copy(source,destination) - if (input('Delete training logs and intermediate checkpoints? [y] ') or 'y').startswith(('y','Y')): - shutil.rmtree(Path(args['output_dir'])) - else: - print(f'>> Keeping {args["output_dir"]}') - -def save_args(args:dict): - ''' - Save the current argument values to an omegaconf file - ''' - dest_dir = Path(Globals.root) / TRAINING_DIR - os.makedirs(dest_dir, exist_ok=True) - conf_file = dest_dir / CONF_FILE - conf = OmegaConf.create(args) - OmegaConf.save(config=conf, f=conf_file) - -def previous_args()->dict: - ''' - Get the previous arguments used. - ''' - conf_file = Path(Globals.root) / TRAINING_DIR / CONF_FILE - try: - conf = OmegaConf.load(conf_file) - conf['placeholder_token'] = conf['placeholder_token'].strip('<>') - except: - conf= None - - return conf - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='InvokeAI textual inversion training') - parser.add_argument( - '--root_dir','--root-dir', - type=Path, - default=Globals.root, - help='Path to the invokeai runtime directory', - ) - args = parser.parse_args() - global_set_root(args.root_dir) - - saved_args = previous_args() - myapplication = MyApplication(saved_args=saved_args) - myapplication.run() - - from ldm.invoke.textual_inversion_training import do_textual_inversion_training - if args := myapplication.ti_arguments: - os.makedirs(args['output_dir'],exist_ok=True) - - # Automatically add angle brackets around the trigger - if not re.match('^<.+>$',args['placeholder_token']): - args['placeholder_token'] = f"<{args['placeholder_token']}>" - - args['only_save_embeds'] = True - save_args(args) - - try: - print(f'DEBUG: args = {args}') - do_textual_inversion_training(**args) - copy_to_embeddings_folder(args) - except Exception as e: - print('** An exception occurred during training. The exception was:') - print(str(e)) - print('** DETAILS:') - print(traceback.format_exc())