fix location of textual_inversion script

2024-08-30 20:32:17 +00:00 · 2023-01-26 11:56:23 -05:00 · 2023-01-26 11:56:23 -05:00 · d3a469d136
commit d3a469d136
parent 61403fe306
6 changed files with 609 additions and 515 deletions
--- a/docs/features/TEXTUAL_INVERSION.md
+++ b/docs/features/TEXTUAL_INVERSION.md
@ -55,7 +55,7 @@ Please enter 1, 2, 3, or 4: [1] 3

 From the command line, with the InvokeAI virtual environment active,
 you can launch the front end with the command
-`textual_inversion_fe`.
+`textual_inversion --gui`.

 This will launch a text-based front end that will look like this:

@ -219,11 +219,9 @@ term. For example `a plate of banana sushi in <psychedelic> style`.

 ## **Training with the Command-Line Script**

-InvokeAI also comes with a traditional command-line script for
-launching textual inversion training. It is named
-`textual_inversion`, and can be launched from within the
-"developer's console", or from the command line after activating
-InvokeAI's virtual environment.
+Training can also be done using a traditional command-line script. It
+can be launched from within the "developer's console", or from the
+command line after activating InvokeAI's virtual environment.

 It accepts a large number of arguments, which can be summarized by
 passing the `--help` argument:
--- a/ldm/invoke/textual_inversion.py
+++ b/ldm/invoke/textual_inversion.py
@ -0,0 +1,414 @@
+#!/usr/bin/env python
+
+import argparse
+import curses
+import os
+import re
+import shutil
+import sys
+import traceback
+from argparse import Namespace
+from pathlib import Path
+from typing import List
+
+import npyscreen
+from omegaconf import OmegaConf
+
+from ldm.invoke.globals import Globals, global_set_root
+from ldm.invoke.textual_inversion_training import (
+    do_textual_inversion_training,
+    parse_args,
+)
+
+TRAINING_DATA = "text-inversion-training-data"
+TRAINING_DIR = "text-inversion-output"
+CONF_FILE = "preferences.conf"
+
+
+class textualInversionForm(npyscreen.FormMultiPageAction):
+    resolutions = [512, 768, 1024]
+    lr_schedulers = [
+        "linear",
+        "cosine",
+        "cosine_with_restarts",
+        "polynomial",
+        "constant",
+        "constant_with_warmup",
+    ]
+    precisions = ["no", "fp16", "bf16"]
+    learnable_properties = ["object", "style"]
+
+    def __init__(self, parentApp, name, saved_args=None):
+        self.saved_args = saved_args or {}
+        super().__init__(parentApp, name)
+
+    def afterEditing(self):
+        self.parentApp.setNextForm(None)
+
+    def create(self):
+        self.model_names, default = self.get_model_names()
+        default_initializer_token = "★"
+        default_placeholder_token = ""
+        saved_args = self.saved_args
+
+        try:
+            default = self.model_names.index(saved_args["model"])
+        except:
+            pass
+
+        self.add_widget_intelligent(
+            npyscreen.FixedText,
+            value="Use ctrl-N and ctrl-P to move to the <N>ext and <P>revious fields, cursor arrows to make a selection, and space to toggle checkboxes.",
+        )
+
+        self.model = self.add_widget_intelligent(
+            npyscreen.TitleSelectOne,
+            name="Model Name:",
+            values=self.model_names,
+            value=default,
+            max_height=len(self.model_names) + 1,
+        )
+        self.placeholder_token = self.add_widget_intelligent(
+            npyscreen.TitleText,
+            name="Trigger Term:",
+            value="",  # saved_args.get('placeholder_token',''), # to restore previous term
+        )
+        self.placeholder_token.when_value_edited = self.initializer_changed
+        self.nextrely -= 1
+        self.nextrelx += 30
+        self.prompt_token = self.add_widget_intelligent(
+            npyscreen.FixedText,
+            name="Trigger term for use in prompt",
+            value="",
+        )
+        self.nextrelx -= 30
+        self.initializer_token = self.add_widget_intelligent(
+            npyscreen.TitleText,
+            name="Initializer:",
+            value=saved_args.get("initializer_token", default_initializer_token),
+        )
+        self.resume_from_checkpoint = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Resume from last saved checkpoint",
+            value=False,
+        )
+        self.learnable_property = self.add_widget_intelligent(
+            npyscreen.TitleSelectOne,
+            name="Learnable property:",
+            values=self.learnable_properties,
+            value=self.learnable_properties.index(
+                saved_args.get("learnable_property", "object")
+            ),
+            max_height=4,
+        )
+        self.train_data_dir = self.add_widget_intelligent(
+            npyscreen.TitleFilename,
+            name="Data Training Directory:",
+            select_dir=True,
+            must_exist=False,
+            value=str(
+                saved_args.get(
+                    "train_data_dir",
+                    Path(Globals.root) / TRAINING_DATA / default_placeholder_token,
+                )
+            ),
+        )
+        self.output_dir = self.add_widget_intelligent(
+            npyscreen.TitleFilename,
+            name="Output Destination Directory:",
+            select_dir=True,
+            must_exist=False,
+            value=str(
+                saved_args.get(
+                    "output_dir",
+                    Path(Globals.root) / TRAINING_DIR / default_placeholder_token,
+                )
+            ),
+        )
+        self.resolution = self.add_widget_intelligent(
+            npyscreen.TitleSelectOne,
+            name="Image resolution (pixels):",
+            values=self.resolutions,
+            value=self.resolutions.index(saved_args.get("resolution", 512)),
+            scroll_exit=True,
+            max_height=4,
+        )
+        self.center_crop = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Center crop images before resizing to resolution",
+            value=saved_args.get("center_crop", False),
+        )
+        self.mixed_precision = self.add_widget_intelligent(
+            npyscreen.TitleSelectOne,
+            name="Mixed Precision:",
+            values=self.precisions,
+            value=self.precisions.index(saved_args.get("mixed_precision", "fp16")),
+            max_height=4,
+        )
+        self.num_train_epochs = self.add_widget_intelligent(
+            npyscreen.TitleSlider,
+            name="Number of training epochs:",
+            out_of=1000,
+            step=50,
+            lowest=1,
+            value=saved_args.get("num_train_epochs", 100),
+        )
+        self.max_train_steps = self.add_widget_intelligent(
+            npyscreen.TitleSlider,
+            name="Max Training Steps:",
+            out_of=10000,
+            step=500,
+            lowest=1,
+            value=saved_args.get("max_train_steps", 3000),
+        )
+        self.train_batch_size = self.add_widget_intelligent(
+            npyscreen.TitleSlider,
+            name="Batch Size (reduce if you run out of memory):",
+            out_of=50,
+            step=1,
+            lowest=1,
+            value=saved_args.get("train_batch_size", 8),
+        )
+        self.gradient_accumulation_steps = self.add_widget_intelligent(
+            npyscreen.TitleSlider,
+            name="Gradient Accumulation Steps (may need to decrease this to resume from a checkpoint):",
+            out_of=10,
+            step=1,
+            lowest=1,
+            value=saved_args.get("gradient_accumulation_steps", 4),
+        )
+        self.lr_warmup_steps = self.add_widget_intelligent(
+            npyscreen.TitleSlider,
+            name="Warmup Steps:",
+            out_of=100,
+            step=1,
+            lowest=0,
+            value=saved_args.get("lr_warmup_steps", 0),
+        )
+        self.learning_rate = self.add_widget_intelligent(
+            npyscreen.TitleText,
+            name="Learning Rate:",
+            value=str(
+                saved_args.get("learning_rate", "5.0e-04"),
+            ),
+        )
+        self.scale_lr = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Scale learning rate by number GPUs, steps and batch size",
+            value=saved_args.get("scale_lr", True),
+        )
+        self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Use xformers acceleration",
+            value=saved_args.get("enable_xformers_memory_efficient_attention", False),
+        )
+        self.lr_scheduler = self.add_widget_intelligent(
+            npyscreen.TitleSelectOne,
+            name="Learning rate scheduler:",
+            values=self.lr_schedulers,
+            max_height=7,
+            scroll_exit=True,
+            value=self.lr_schedulers.index(saved_args.get("lr_scheduler", "constant")),
+        )
+
+    def initializer_changed(self):
+        placeholder = self.placeholder_token.value
+        self.prompt_token.value = f"(Trigger by using <{placeholder}> in your prompts)"
+        self.train_data_dir.value = str(
+            Path(Globals.root) / TRAINING_DATA / placeholder
+        )
+        self.output_dir.value = str(Path(Globals.root) / TRAINING_DIR / placeholder)
+        self.resume_from_checkpoint.value = Path(self.output_dir.value).exists()
+
+    def on_ok(self):
+        if self.validate_field_values():
+            self.parentApp.setNextForm(None)
+            self.editing = False
+            self.parentApp.ti_arguments = self.marshall_arguments()
+            npyscreen.notify(
+                "Launching textual inversion training. This will take a while..."
+            )
+        else:
+            self.editing = True
+
+    def ok_cancel(self):
+        sys.exit(0)
+
+    def validate_field_values(self) -> bool:
+        bad_fields = []
+        if self.model.value is None:
+            bad_fields.append(
+                "Model Name must correspond to a known model in models.yaml"
+            )
+        if not re.match("^[a-zA-Z0-9.-]+$", self.placeholder_token.value):
+            bad_fields.append(
+                "Trigger term must only contain alphanumeric characters, the dot and hyphen"
+            )
+        if self.train_data_dir.value is None:
+            bad_fields.append("Data Training Directory cannot be empty")
+        if self.output_dir.value is None:
+            bad_fields.append("The Output Destination Directory cannot be empty")
+        if len(bad_fields) > 0:
+            message = "The following problems were detected and must be corrected:"
+            for problem in bad_fields:
+                message += f"\n* {problem}"
+            npyscreen.notify_confirm(message)
+            return False
+        else:
+            return True
+
+    def get_model_names(self) -> (List[str], int):
+        conf = OmegaConf.load(os.path.join(Globals.root, "configs/models.yaml"))
+        model_names = [
+            idx
+            for idx in sorted(list(conf.keys()))
+            if conf[idx].get("format", None) == "diffusers"
+        ]
+        defaults = [
+            idx
+            for idx in range(len(model_names))
+            if "default" in conf[model_names[idx]]
+        ]
+        return (model_names, defaults[0])
+
+    def marshall_arguments(self) -> dict:
+        args = dict()
+
+        # the choices
+        args.update(
+            model=self.model_names[self.model.value[0]],
+            resolution=self.resolutions[self.resolution.value[0]],
+            lr_scheduler=self.lr_schedulers[self.lr_scheduler.value[0]],
+            mixed_precision=self.precisions[self.mixed_precision.value[0]],
+            learnable_property=self.learnable_properties[
+                self.learnable_property.value[0]
+            ],
+        )
+
+        # all the strings and booleans
+        for attr in (
+            "initializer_token",
+            "placeholder_token",
+            "train_data_dir",
+            "output_dir",
+            "scale_lr",
+            "center_crop",
+            "enable_xformers_memory_efficient_attention",
+        ):
+            args[attr] = getattr(self, attr).value
+
+        # all the integers
+        for attr in (
+            "train_batch_size",
+            "gradient_accumulation_steps",
+            "num_train_epochs",
+            "max_train_steps",
+            "lr_warmup_steps",
+        ):
+            args[attr] = int(getattr(self, attr).value)
+
+        # the floats (just one)
+        args.update(learning_rate=float(self.learning_rate.value))
+
+        # a special case
+        if self.resume_from_checkpoint.value and Path(self.output_dir.value).exists():
+            args["resume_from_checkpoint"] = "latest"
+
+        return args
+
+
+class MyApplication(npyscreen.NPSAppManaged):
+    def __init__(self, saved_args=None):
+        super().__init__()
+        self.ti_arguments = None
+        self.saved_args = saved_args
+
+    def onStart(self):
+        npyscreen.setTheme(npyscreen.Themes.DefaultTheme)
+        self.main = self.addForm(
+            "MAIN",
+            textualInversionForm,
+            name="Textual Inversion Settings",
+            saved_args=self.saved_args,
+        )
+
+
+def copy_to_embeddings_folder(args: dict):
+    """
+    Copy learned_embeds.bin into the embeddings folder, and offer to
+    delete the full model and checkpoints.
+    """
+    source = Path(args["output_dir"], "learned_embeds.bin")
+    dest_dir_name = args["placeholder_token"].strip("<>")
+    destination = Path(Globals.root, "embeddings", dest_dir_name)
+    os.makedirs(destination, exist_ok=True)
+    print(f">> Training completed. Copying learned_embeds.bin into {str(destination)}")
+    shutil.copy(source, destination)
+    if (
+        input("Delete training logs and intermediate checkpoints? [y] ") or "y"
+    ).startswith(("y", "Y")):
+        shutil.rmtree(Path(args["output_dir"]))
+    else:
+        print(f'>> Keeping {args["output_dir"]}')
+
+
+def save_args(args: dict):
+    """
+    Save the current argument values to an omegaconf file
+    """
+    dest_dir = Path(Globals.root) / TRAINING_DIR
+    os.makedirs(dest_dir, exist_ok=True)
+    conf_file = dest_dir / CONF_FILE
+    conf = OmegaConf.create(args)
+    OmegaConf.save(config=conf, f=conf_file)
+
+
+def previous_args() -> dict:
+    """
+    Get the previous arguments used.
+    """
+    conf_file = Path(Globals.root) / TRAINING_DIR / CONF_FILE
+    try:
+        conf = OmegaConf.load(conf_file)
+        conf["placeholder_token"] = conf["placeholder_token"].strip("<>")
+    except:
+        conf = None
+
+    return conf
+
+
+def do_front_end(args: Namespace):
+    saved_args = previous_args()
+    myapplication = MyApplication(saved_args=saved_args)
+    myapplication.run()
+
+    if args := myapplication.ti_arguments:
+        os.makedirs(args["output_dir"], exist_ok=True)
+
+        # Automatically add angle brackets around the trigger
+        if not re.match("^<.+>$", args["placeholder_token"]):
+            args["placeholder_token"] = f"<{args['placeholder_token']}>"
+
+        args["only_save_embeds"] = True
+        save_args(args)
+
+        try:
+            print(f"DEBUG: args = {args}")
+            do_textual_inversion_training(**args)
+            copy_to_embeddings_folder(args)
+        except Exception as e:
+            print("** An exception occurred during training. The exception was:")
+            print(str(e))
+            print("** DETAILS:")
+            print(traceback.format_exc())
+
+def main():
+    args = parse_args()
+    global_set_root(args.root_dir or Globals.root)
+    try:
+        if args.front_end:
+            do_front_end(args)
+        else:
+            do_textual_inversion_training(**vars(args))
+    except AssertionError as e:
+        print(str(e))
--- a/ldm/invoke/textual_inversion_training.py
+++ b/ldm/invoke/textual_inversion_training.py
@ -31,6 +31,10 @@ from diffusers.utils.import_utils import is_xformers_available
 from huggingface_hub import HfFolder, Repository, whoami

 # invokeai stuff
+from ldm.invoke.args import (
+    PagingArgumentParser,
+    ArgFormatter
+)
 from ldm.invoke.globals import Globals, global_cache_dir
 from omegaconf import OmegaConf

@ -74,145 +78,32 @@ def save_progress(text_encoder, placeholder_token_id, accelerator, placeholder_t
    torch.save(learned_embeds_dict, save_path)

 def parse_args():
-    parser = argparse.ArgumentParser(description="Simple example of a training script.")
-    parser.add_argument(
-        "--save_steps",
-        type=int,
-        default=500,
-        help="Save learned_embeds.bin every X updates steps.",
+    parser = PagingArgumentParser(
+        description="Textual inversion training",
+        formatter_class=ArgFormatter
    )
-    parser.add_argument(
+    general_group = parser.add_argument_group('General')
+    model_group = parser.add_argument_group('Models and Paths')
+    image_group = parser.add_argument_group('Training Image Location and Options')
+    trigger_group = parser.add_argument_group('Trigger Token')
+    training_group = parser.add_argument_group('Training Parameters')
+    checkpointing_group = parser.add_argument_group('Checkpointing and Resume')
+    integration_group = parser.add_argument_group('Integration')
+    general_group.add_argument(
+        '--front_end',
+        '--gui',
+        dest='front_end',
+        action="store_true",
+        default=False,
+        help="Activate the text-based graphical front end for collecting parameters. Other parameters will be ignored."
+    )
+    general_group.add_argument(
        '--root_dir','--root',
        type=Path,
        default=Globals.root,
        help="Path to the invokeai runtime directory",
    )
-    parser.add_argument(
-        "--only_save_embeds",
-        action="store_true",
-        default=False,
-        help="Save only the embeddings for the new concept.",
-    )
-    parser.add_argument(
-        "--model",
-        type=str,
-        default=None,
-        required=True,
-        help="Name of the diffusers model to train against, as defined in configs/models.yaml.",
-    )
-    parser.add_argument(
-        "--revision",
-        type=str,
-        default=None,
-        required=False,
-        help="Revision of pretrained model identifier from huggingface.co/models.",
-    )
-    parser.add_argument(
-        "--tokenizer_name",
-        type=str,
-        default=None,
-        help="Pretrained tokenizer name or path if not the same as model_name",
-    )
-    parser.add_argument(
-        "--train_data_dir",
-        type=Path,
-        default=None,
-        required=True,
-        help="A folder containing the training data."
-    )
-    parser.add_argument(
-        "--placeholder_token",
-        type=str,
-        default=None,
-        required=True,
-        help="A token to use as a placeholder for the concept.",
-    )
-    parser.add_argument(
-        "--initializer_token",
-        type=str,
-        default=None,
-        required=False,
-        help="A token to use as initializer word."
-    )
-    parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'")
-    parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.")
-    parser.add_argument(
-        "--output_dir",
-        type=Path,
-        default=f'{Globals.root}/text-inversion-model',
-        help="The output directory where the model predictions and checkpoints will be written.",
-    )
-    parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
-    parser.add_argument(
-        "--resolution",
-        type=int,
-        default=512,
-        help=(
-            "The resolution for input images, all the images in the train/validation dataset will be resized to this"
-            " resolution"
-        ),
-    )
-    parser.add_argument(
-        "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution"
-    )
-    parser.add_argument(
-        "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader."
-    )
-    parser.add_argument("--num_train_epochs", type=int, default=100)
-    parser.add_argument(
-        "--max_train_steps",
-        type=int,
-        default=5000,
-        help="Total number of training steps to perform.  If provided, overrides num_train_epochs.",
-    )
-    parser.add_argument(
-        "--gradient_accumulation_steps",
-        type=int,
-        default=1,
-        help="Number of updates steps to accumulate before performing a backward/update pass.",
-    )
-    parser.add_argument(
-        "--gradient_checkpointing",
-        action="store_true",
-        help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
-    )
-    parser.add_argument(
-        "--learning_rate",
-        type=float,
-        default=1e-4,
-        help="Initial learning rate (after the potential warmup period) to use.",
-    )
-    parser.add_argument(
-        "--scale_lr",
-        action="store_true",
-        default=True,
-        help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
-    )
-    parser.add_argument(
-        "--lr_scheduler",
-        type=str,
-        default="constant",
-        help=(
-            'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
-            ' "constant", "constant_with_warmup"]'
-        ),
-    )
-    parser.add_argument(
-        "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
-    )
-    parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
-    parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
-    parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
-    parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
-    parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
-    parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
-    parser.add_argument(
-        "--hub_model_id",
-        type=str,
-        default=None,
-        help="The name of the repository to keep in sync with the local `output_dir`.",
-    )
-    parser.add_argument(
+    general_group.add_argument(
        "--logging_dir",
        type=Path,
        default="logs",
@ -221,7 +112,147 @@ def parse_args():
            " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***."
        ),
    )
-    parser.add_argument(
+    general_group.add_argument(
+        "--output_dir",
+        type=Path,
+        default=f'{Globals.root}/text-inversion-model',
+        help="The output directory where the model predictions and checkpoints will be written.",
+    )
+    model_group.add_argument(
+        "--model",
+        type=str,
+        default='stable-diffusion-1.5',
+        help="Name of the diffusers model to train against, as defined in configs/models.yaml.",
+    )
+    model_group.add_argument(
+        "--revision",
+        type=str,
+        default=None,
+        required=False,
+        help="Revision of pretrained model identifier from huggingface.co/models.",
+    )
+    
+    model_group.add_argument(
+        "--tokenizer_name",
+        type=str,
+        default=None,
+        help="Pretrained tokenizer name or path if not the same as model_name",
+    )
+    image_group.add_argument(
+        "--train_data_dir",
+        type=Path,
+        default=None,
+        help="A folder containing the training data."
+    )
+    image_group.add_argument(
+        "--resolution",
+        type=int,
+        default=512,
+        help=(
+            "The resolution for input images, all the images in the train/validation dataset will be resized to this"
+            " resolution"
+        ),
+    )
+    image_group.add_argument(
+        "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution"
+    )
+    trigger_group.add_argument(
+        "--placeholder_token",
+        "--trigger_term",
+        dest='placeholder_token',
+        type=str,
+        default=None,
+        help="A token to use as a placeholder for the concept. This token will trigger the concept when included in the prompt as \"<trigger>\".",
+    )
+    trigger_group.add_argument(
+        "--learnable_property",
+        type=str,
+        choices=['object','style'],
+        default="object",
+        help="Choose between 'object' and 'style'"
+    )
+    trigger_group.add_argument(
+        "--initializer_token",
+        type=str,
+        default='*',
+        help="A symbol to use as the initializer word."
+    )
+    checkpointing_group.add_argument(
+        "--checkpointing_steps",
+        type=int,
+        default=500,
+        help=(
+            "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming"
+            " training using `--resume_from_checkpoint`."
+        ),
+    )
+    checkpointing_group.add_argument(
+        "--resume_from_checkpoint",
+        type=Path,
+        default=None,
+        help=(
+            "Whether training should be resumed from a previous checkpoint. Use a path saved by"
+            ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
+        ),
+    )
+    checkpointing_group.add_argument(
+        "--save_steps",
+        type=int,
+        default=500,
+        help="Save learned_embeds.bin every X updates steps.",
+    )
+    training_group.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.")
+    training_group.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
+    training_group.add_argument(
+        "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader."
+    )
+    training_group.add_argument("--num_train_epochs", type=int, default=100)
+    training_group.add_argument(
+        "--max_train_steps",
+        type=int,
+        default=5000,
+        help="Total number of training steps to perform.  If provided, overrides num_train_epochs.",
+    )
+    training_group.add_argument(
+        "--gradient_accumulation_steps",
+        type=int,
+        default=1,
+        help="Number of updates steps to accumulate before performing a backward/update pass.",
+    )
+    training_group.add_argument(
+        "--gradient_checkpointing",
+        action="store_true",
+        help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
+    )
+    training_group.add_argument(
+        "--learning_rate",
+        type=float,
+        default=1e-4,
+        help="Initial learning rate (after the potential warmup period) to use.",
+    )
+    training_group.add_argument(
+        "--scale_lr",
+        action="store_true",
+        default=True,
+        help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
+    )
+    training_group.add_argument(
+        "--lr_scheduler",
+        type=str,
+        default="constant",
+        help=(
+            'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
+            ' "constant", "constant_with_warmup"]'
+        ),
+    )
+    training_group.add_argument(
+        "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
+    )
+    training_group.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
+    training_group.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
+    training_group.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
+    training_group.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
+    training_group.add_argument(
        "--mixed_precision",
        type=str,
        default="no",
@ -232,7 +263,7 @@ def parse_args():
            "and an Nvidia Ampere GPU."
        ),
    )
-    parser.add_argument(
+    training_group.add_argument(
        "--allow_tf32",
        action="store_true",
        help=(
@ -240,7 +271,24 @@ def parse_args():
            " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices"
        ),
    )
+    training_group.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
    parser.add_argument(
+        "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
+    )
+
+    integration_group.add_argument(
+        "--only_save_embeds",
+        action="store_true",
+        default=False,
+        help="Save only the embeddings for the new concept.",
+    )
+    integration_group.add_argument(
+        "--hub_model_id",
+        type=str,
+        default=None,
+        help="The name of the repository to keep in sync with the local `output_dir`.",
+    )
+    integration_group.add_argument(
        "--report_to",
        type=str,
        default="tensorboard",
@ -249,29 +297,8 @@ def parse_args():
            ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.'
        ),
    )
-    parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
-    parser.add_argument(
-        "--checkpointing_steps",
-        type=int,
-        default=500,
-        help=(
-            "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming"
-            " training using `--resume_from_checkpoint`."
-        ),
-    )
-    parser.add_argument(
-        "--resume_from_checkpoint",
-        type=Path,
-        default=None,
-        help=(
-            "Whether training should be resumed from a previous checkpoint. Use a path saved by"
-            ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
-        ),
-    )
-    parser.add_argument(
-        "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
-    )
-
+    integration_group.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
+    integration_group.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
    args = parser.parse_args()
    return args

@ -462,7 +489,11 @@ def do_textual_inversion_training(
        enable_xformers_memory_efficient_attention:bool=False,
        root_dir:Path=None,
        hub_model_id:str=None,
+        **kwargs,
 ):
+    assert model, 'Please specify a base model with --model'
+    assert train_data_dir, 'Please specify a directory containing the training images using --train_data_dir'
+    assert placeholder_token, 'Please specify a trigger term using --placeholder_token'
    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
    if env_local_rank != -1 and env_local_rank != local_rank:
        local_rank = env_local_rank
--- a/pyproject.toml
+++ b/pyproject.toml
@ -98,6 +98,8 @@ test = ["pytest>6.0.0", "pytest-cov"]
 "load_models" = "scripts:configure_invokeai.main"
 "merge_embeddings" = "scripts:merge_embeddings.main"
 "preload_models" = "ldm.invoke.configure_invokeai:main"
+"textual_inversion" = "ldm.invoke.textual_inversion:main"
+"merge_models" = "ldm.invoke.merge_models:main"

 [project.urls]
 "Homepage" = "https://invoke-ai.github.io/InvokeAI/"
--- a/scripts/merge_models.py
+++ b/scripts/merge_models.py
@ -15,7 +15,6 @@ from ldm.invoke.model_manager import ModelManager
 parser = argparse.ArgumentParser(description="InvokeAI textual inversion training")
 parser.add_argument(
    "--root_dir",
-    "--root-dir",
    type=Path,
    default=Globals.root,
    help="Path to the invokeai runtime directory",
--- a/scripts/textual_inversion_fe.py
+++ b/scripts/textual_inversion_fe.py
@ -1,350 +0,0 @@
-#!/usr/bin/env python
-
-import npyscreen
-import os
-import sys
-import re
-import shutil
-import traceback
-import curses
-from ldm.invoke.globals import Globals, global_set_root
-from omegaconf import OmegaConf
-from pathlib import Path
-from typing import List
-import argparse
-
-TRAINING_DATA = 'text-inversion-training-data'
-TRAINING_DIR = 'text-inversion-output'
-CONF_FILE = 'preferences.conf'
-
-class textualInversionForm(npyscreen.FormMultiPageAction):
-    resolutions = [512, 768, 1024]
-    lr_schedulers = [
-                "linear", "cosine", "cosine_with_restarts",
-                "polynomial","constant", "constant_with_warmup"
-    ]
-    precisions = ['no','fp16','bf16']
-    learnable_properties = ['object','style']
-
-    def __init__(self, parentApp, name, saved_args=None):
-        self.saved_args = saved_args or {}
-        super().__init__(parentApp, name)
-
-    def afterEditing(self):
-        self.parentApp.setNextForm(None)
-
-    def create(self):
-        self.model_names, default = self.get_model_names()
-        default_initializer_token = '★'
-        default_placeholder_token = ''
-        saved_args = self.saved_args
-
-        try:
-            default = self.model_names.index(saved_args['model'])
-        except:
-            pass
-
-        self.add_widget_intelligent(
-            npyscreen.FixedText,
-            value='Use ctrl-N and ctrl-P to move to the <N>ext and <P>revious fields, cursor arrows to make a selection, and space to toggle checkboxes.'
-        )
-
-        self.model = self.add_widget_intelligent(
-            npyscreen.TitleSelectOne,
-            name='Model Name:',
-            values=self.model_names,
-            value=default,
-            max_height=len(self.model_names)+1
-        )
-        self.placeholder_token = self.add_widget_intelligent(
-            npyscreen.TitleText,
-            name='Trigger Term:',
-            value='', # saved_args.get('placeholder_token',''), # to restore previous term
-        )
-        self.placeholder_token.when_value_edited = self.initializer_changed
-        self.nextrely -= 1
-        self.nextrelx += 30
-        self.prompt_token = self.add_widget_intelligent(
-            npyscreen.FixedText,
-            name="Trigger term for use in prompt",
-            value='',
-        )
-        self.nextrelx -= 30
-        self.initializer_token = self.add_widget_intelligent(
-            npyscreen.TitleText,
-            name='Initializer:',
-            value=saved_args.get('initializer_token',default_initializer_token),
-        )
-        self.resume_from_checkpoint = self.add_widget_intelligent(
-            npyscreen.Checkbox,
-            name="Resume from last saved checkpoint",
-            value=False,
-        )
-        self.learnable_property = self.add_widget_intelligent(
-            npyscreen.TitleSelectOne,
-            name="Learnable property:",
-            values=self.learnable_properties,
-            value=self.learnable_properties.index(saved_args.get('learnable_property','object')),
-            max_height=4,
-        )
-        self.train_data_dir = self.add_widget_intelligent(
-            npyscreen.TitleFilename,
-            name='Data Training Directory:',
-            select_dir=True,
-            must_exist=False,
-            value=str(saved_args.get('train_data_dir',Path(Globals.root) / TRAINING_DATA / default_placeholder_token))
-        )
-        self.output_dir = self.add_widget_intelligent(
-            npyscreen.TitleFilename,
-            name='Output Destination Directory:',
-            select_dir=True,
-            must_exist=False,
-            value=str(saved_args.get('output_dir',Path(Globals.root) / TRAINING_DIR / default_placeholder_token))
-        )
-        self.resolution = self.add_widget_intelligent(
-            npyscreen.TitleSelectOne,
-            name='Image resolution (pixels):',
-            values = self.resolutions,
-            value=self.resolutions.index(saved_args.get('resolution',512)),
-            scroll_exit = True,
-            max_height=4,
-        )
-        self.center_crop = self.add_widget_intelligent(
-            npyscreen.Checkbox,
-            name="Center crop images before resizing to resolution",
-            value=saved_args.get('center_crop',False)
-        )
-        self.mixed_precision = self.add_widget_intelligent(
-            npyscreen.TitleSelectOne,
-            name='Mixed Precision:',
-            values=self.precisions,
-            value=self.precisions.index(saved_args.get('mixed_precision','fp16')),
-            max_height=4,
-        )
-        self.num_train_epochs = self.add_widget_intelligent(
-            npyscreen.TitleSlider,
-            name='Number of training epochs:',
-            out_of=1000,
-            step=50,
-            lowest=1,
-            value=saved_args.get('num_train_epochs',100)
-        )
-        self.max_train_steps = self.add_widget_intelligent(
-            npyscreen.TitleSlider,
-            name='Max Training Steps:',
-            out_of=10000,
-            step=500,
-            lowest=1,
-            value=saved_args.get('max_train_steps',3000)
-        )
-        self.train_batch_size = self.add_widget_intelligent(
-            npyscreen.TitleSlider,
-            name='Batch Size (reduce if you run out of memory):',
-            out_of=50,
-            step=1,
-            lowest=1,
-            value=saved_args.get('train_batch_size',8),
-        )
-        self.gradient_accumulation_steps = self.add_widget_intelligent(
-            npyscreen.TitleSlider,
-            name='Gradient Accumulation Steps (may need to decrease this to resume from a checkpoint):',
-            out_of=10,
-            step=1,
-            lowest=1,
-            value=saved_args.get('gradient_accumulation_steps',4)
-        )
-        self.lr_warmup_steps = self.add_widget_intelligent(
-            npyscreen.TitleSlider,
-            name='Warmup Steps:',
-            out_of=100,
-            step=1,
-            lowest=0,
-            value=saved_args.get('lr_warmup_steps',0),
-        )
-        self.learning_rate = self.add_widget_intelligent(
-            npyscreen.TitleText,
-            name="Learning Rate:",
-            value=str(saved_args.get('learning_rate','5.0e-04'),)
-        )
-        self.scale_lr = self.add_widget_intelligent(
-            npyscreen.Checkbox,
-            name="Scale learning rate by number GPUs, steps and batch size",
-            value=saved_args.get('scale_lr',True),
-        )
-        self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
-            npyscreen.Checkbox,
-            name="Use xformers acceleration",
-            value=saved_args.get('enable_xformers_memory_efficient_attention',False),
-        )
-        self.lr_scheduler = self.add_widget_intelligent(
-            npyscreen.TitleSelectOne,
-            name='Learning rate scheduler:',
-            values = self.lr_schedulers,
-            max_height=7,
-            scroll_exit = True,
-            value=self.lr_schedulers.index(saved_args.get('lr_scheduler','constant')),
-        )
-
-    def initializer_changed(self):
-        placeholder = self.placeholder_token.value
-        self.prompt_token.value = f'(Trigger by using <{placeholder}> in your prompts)'
-        self.train_data_dir.value = str(Path(Globals.root) / TRAINING_DATA / placeholder)
-        self.output_dir.value = str(Path(Globals.root) / TRAINING_DIR / placeholder)
-        self.resume_from_checkpoint.value = Path(self.output_dir.value).exists()
-        
-    def on_ok(self):
-        if self.validate_field_values():
-            self.parentApp.setNextForm(None)
-            self.editing = False
-            self.parentApp.ti_arguments = self.marshall_arguments()
-            npyscreen.notify('Launching textual inversion training. This will take a while...')
-            # The module load takes a while, so we do it while the form and message are still up
-            import ldm.invoke.textual_inversion_training
-        else:
-            self.editing = True
-
-    def ok_cancel(self):
-        sys.exit(0)
-
-    def validate_field_values(self)->bool:
-        bad_fields = []
-        if self.model.value is None:
-            bad_fields.append('Model Name must correspond to a known model in models.yaml')
-        if not re.match('^[a-zA-Z0-9.-]+$',self.placeholder_token.value):
-            bad_fields.append('Trigger term must only contain alphanumeric characters, the dot and hyphen')
-        if self.train_data_dir.value is None:
-            bad_fields.append('Data Training Directory cannot be empty')
-        if self.output_dir.value is None:
-            bad_fields.append('The Output Destination Directory cannot be empty')
-        if len(bad_fields) > 0:
-            message = 'The following problems were detected and must be corrected:'
-            for problem in bad_fields:
-                message += f'\n* {problem}'
-            npyscreen.notify_confirm(message)
-            return False
-        else:
-            return True
-
-    def get_model_names(self)->(List[str],int):
-        conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml'))
-        model_names = [idx for idx in sorted(list(conf.keys())) if conf[idx].get('format',None)=='diffusers']
-        defaults = [idx for idx in range(len(model_names)) if 'default' in conf[model_names[idx]]]
-        return (model_names,defaults[0])
-
-    def marshall_arguments(self)->dict:
-        args = dict()
-
-        # the choices
-        args.update(
-            model = self.model_names[self.model.value[0]],
-            resolution = self.resolutions[self.resolution.value[0]],
-            lr_scheduler = self.lr_schedulers[self.lr_scheduler.value[0]],
-            mixed_precision = self.precisions[self.mixed_precision.value[0]],
-            learnable_property = self.learnable_properties[self.learnable_property.value[0]],
-        )
-
-        # all the strings and booleans
-        for attr in ('initializer_token','placeholder_token','train_data_dir',
-                     'output_dir','scale_lr','center_crop','enable_xformers_memory_efficient_attention'):
-            args[attr] = getattr(self,attr).value
-            
-        # all the integers
-        for attr in ('train_batch_size','gradient_accumulation_steps',
-                     'num_train_epochs','max_train_steps','lr_warmup_steps'):
-            args[attr] = int(getattr(self,attr).value)
-
-        # the floats (just one)
-        args.update(
-            learning_rate = float(self.learning_rate.value)
-        )
-
-        # a special case
-        if self.resume_from_checkpoint.value and Path(self.output_dir.value).exists():
-            args['resume_from_checkpoint'] = 'latest'
-
-        return args
-
-class MyApplication(npyscreen.NPSAppManaged):
-    def __init__(self, saved_args=None):
-        super().__init__()
-        self.ti_arguments=None
-        self.saved_args=saved_args
-
-    def onStart(self):
-        npyscreen.setTheme(npyscreen.Themes.DefaultTheme)
-        self.main = self.addForm('MAIN', textualInversionForm, name='Textual Inversion Settings', saved_args=self.saved_args)
-
-def copy_to_embeddings_folder(args:dict):
-    '''
-    Copy learned_embeds.bin into the embeddings folder, and offer to
-    delete the full model and checkpoints.
-    '''
-    source = Path(args['output_dir'],'learned_embeds.bin')
-    dest_dir_name = args['placeholder_token'].strip('<>')
-    destination = Path(Globals.root,'embeddings',dest_dir_name)
-    os.makedirs(destination,exist_ok=True)
-    print(f'>> Training completed. Copying learned_embeds.bin into {str(destination)}')
-    shutil.copy(source,destination)
-    if (input('Delete training logs and intermediate checkpoints? [y] ') or 'y').startswith(('y','Y')):
-        shutil.rmtree(Path(args['output_dir']))
-    else:
-        print(f'>> Keeping {args["output_dir"]}')
-
-def save_args(args:dict):
-    '''
-    Save the current argument values to an omegaconf file
-    '''
-    dest_dir = Path(Globals.root) / TRAINING_DIR
-    os.makedirs(dest_dir, exist_ok=True)
-    conf_file = dest_dir / CONF_FILE
-    conf = OmegaConf.create(args)
-    OmegaConf.save(config=conf, f=conf_file)
-
-def previous_args()->dict:
-    '''
-    Get the previous arguments used.
-    '''
-    conf_file = Path(Globals.root) / TRAINING_DIR / CONF_FILE
-    try:
-        conf = OmegaConf.load(conf_file)
-        conf['placeholder_token'] = conf['placeholder_token'].strip('<>')
-    except:
-        conf= None
-
-    return conf
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='InvokeAI textual inversion training')
-    parser.add_argument(
-        '--root_dir','--root-dir',
-        type=Path,
-        default=Globals.root,
-        help='Path to the invokeai runtime directory',
-    )
-    args = parser.parse_args()
-    global_set_root(args.root_dir)
-
-    saved_args = previous_args()
-    myapplication = MyApplication(saved_args=saved_args)
-    myapplication.run()
-    
-    from ldm.invoke.textual_inversion_training import do_textual_inversion_training
-    if args := myapplication.ti_arguments:
-        os.makedirs(args['output_dir'],exist_ok=True)
-        
-        # Automatically add angle brackets around the trigger
-        if not re.match('^<.+>$',args['placeholder_token']):
-            args['placeholder_token'] = f"<{args['placeholder_token']}>"
-
-        args['only_save_embeds'] = True
-        save_args(args)
-
-        try:
-            print(f'DEBUG: args = {args}')
-            do_textual_inversion_training(**args)
-            copy_to_embeddings_folder(args)
-        except Exception as e:
-            print('** An exception occurred during training. The exception was:')
-            print(str(e))
-            print('** DETAILS:')
-            print(traceback.format_exc())