mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Fix textual inversion documentation and code (#3015)
This PR addresses issues raised by #3008. 1. Update documentation to indicate the correct maximum batch size for TI training when xformers is and isn't used. 2. Update textual inversion code so that the default for batch size is aware of xformer availability. 3. Add documentation for how to launch TI with distributed learning.
This commit is contained in:
@ -154,8 +154,11 @@ training sets will converge with 2000-3000 steps.
|
|||||||
|
|
||||||
This adjusts how many training images are processed simultaneously in
|
This adjusts how many training images are processed simultaneously in
|
||||||
each step. Higher values will cause the training process to run more
|
each step. Higher values will cause the training process to run more
|
||||||
quickly, but use more memory. The default size will run with GPUs with
|
quickly, but use more memory. The default size is selected based on
|
||||||
as little as 12 GB.
|
whether you have the `xformers` memory-efficient attention library
|
||||||
|
installed. If `xformers` is available, the batch size will be 8,
|
||||||
|
otherwise 3. These values were chosen to allow training to run with
|
||||||
|
GPUs with as little as 12 GB VRAM.
|
||||||
|
|
||||||
### Learning rate
|
### Learning rate
|
||||||
|
|
||||||
@ -172,8 +175,10 @@ learning rate to improve performance.
|
|||||||
|
|
||||||
### Use xformers acceleration
|
### Use xformers acceleration
|
||||||
|
|
||||||
This will activate XFormers memory-efficient attention. You need to
|
This will activate XFormers memory-efficient attention, which will
|
||||||
have XFormers installed for this to have an effect.
|
reduce memory requirements by half or more and allow you to select a
|
||||||
|
higher batch size. You need to have XFormers installed for this to
|
||||||
|
have an effect.
|
||||||
|
|
||||||
### Learning rate scheduler
|
### Learning rate scheduler
|
||||||
|
|
||||||
@ -250,6 +255,49 @@ invokeai-ti \
|
|||||||
--only_save_embeds
|
--only_save_embeds
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Using Distributed Training
|
||||||
|
|
||||||
|
If you have multiple GPUs on one machine, or a cluster of GPU-enabled
|
||||||
|
machines, you can activate distributed training. See the [HuggingFace
|
||||||
|
Accelerate pages](https://huggingface.co/docs/accelerate/index) for
|
||||||
|
full information, but the basic recipe is:
|
||||||
|
|
||||||
|
1. Enter the InvokeAI developer's console command line by selecting
|
||||||
|
option [8] from the `invoke.sh`/`invoke.bat` script.
|
||||||
|
|
||||||
|
2. Configurate Accelerate using `accelerate config`:
|
||||||
|
```sh
|
||||||
|
accelerate config
|
||||||
|
```
|
||||||
|
This will guide you through the configuration process, including
|
||||||
|
specifying how many machines you will run training on and the number
|
||||||
|
of GPUs pe rmachine.
|
||||||
|
|
||||||
|
You only need to do this once.
|
||||||
|
|
||||||
|
3. Launch training from the command line using `accelerate launch`. Be sure
|
||||||
|
that your current working directory is the InvokeAI root directory (usually
|
||||||
|
named `invokeai` in your home directory):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
accelerate launch .venv/bin/invokeai-ti \
|
||||||
|
--model=stable-diffusion-1.5 \
|
||||||
|
--resolution=512 \
|
||||||
|
--learnable_property=object \
|
||||||
|
--initializer_token='*' \
|
||||||
|
--placeholder_token='<shraddha>' \
|
||||||
|
--train_data_dir=/home/lstein/invokeai/text-inversion-training-data/shraddha \
|
||||||
|
--output_dir=/home/lstein/invokeai/text-inversion-training/shraddha \
|
||||||
|
--scale_lr \
|
||||||
|
--train_batch_size=10 \
|
||||||
|
--gradient_accumulation_steps=4 \
|
||||||
|
--max_train_steps=2000 \
|
||||||
|
--learning_rate=0.0005 \
|
||||||
|
--lr_scheduler=constant \
|
||||||
|
--mixed_precision=fp16 \
|
||||||
|
--only_save_embeds
|
||||||
|
```
|
||||||
|
|
||||||
## Using Embeddings
|
## Using Embeddings
|
||||||
|
|
||||||
After training completes, the resultant embeddings will be saved into your `$INVOKEAI_ROOT/embeddings/<trigger word>/learned_embeds.bin`.
|
After training completes, the resultant embeddings will be saved into your `$INVOKEAI_ROOT/embeddings/<trigger word>/learned_embeds.bin`.
|
||||||
|
@ -17,6 +17,7 @@ from pathlib import Path
|
|||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
import npyscreen
|
import npyscreen
|
||||||
|
from diffusers.utils.import_utils import is_xformers_available
|
||||||
from npyscreen import widget
|
from npyscreen import widget
|
||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
|
|
||||||
@ -29,7 +30,7 @@ from ldm.invoke.training.textual_inversion_training import (
|
|||||||
TRAINING_DATA = "text-inversion-training-data"
|
TRAINING_DATA = "text-inversion-training-data"
|
||||||
TRAINING_DIR = "text-inversion-output"
|
TRAINING_DIR = "text-inversion-output"
|
||||||
CONF_FILE = "preferences.conf"
|
CONF_FILE = "preferences.conf"
|
||||||
|
XFORMERS_AVAILABLE = is_xformers_available()
|
||||||
|
|
||||||
class textualInversionForm(npyscreen.FormMultiPageAction):
|
class textualInversionForm(npyscreen.FormMultiPageAction):
|
||||||
resolutions = [512, 768, 1024]
|
resolutions = [512, 768, 1024]
|
||||||
@ -178,7 +179,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
|
|||||||
out_of=10000,
|
out_of=10000,
|
||||||
step=500,
|
step=500,
|
||||||
lowest=1,
|
lowest=1,
|
||||||
value=saved_args.get("max_train_steps", 3000),
|
value=saved_args.get("max_train_steps", 2500),
|
||||||
scroll_exit=True,
|
scroll_exit=True,
|
||||||
)
|
)
|
||||||
self.train_batch_size = self.add_widget_intelligent(
|
self.train_batch_size = self.add_widget_intelligent(
|
||||||
@ -187,7 +188,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
|
|||||||
out_of=50,
|
out_of=50,
|
||||||
step=1,
|
step=1,
|
||||||
lowest=1,
|
lowest=1,
|
||||||
value=saved_args.get("train_batch_size", 8),
|
value=saved_args.get("train_batch_size", 8 if XFORMERS_AVAILABLE else 3),
|
||||||
scroll_exit=True,
|
scroll_exit=True,
|
||||||
)
|
)
|
||||||
self.gradient_accumulation_steps = self.add_widget_intelligent(
|
self.gradient_accumulation_steps = self.add_widget_intelligent(
|
||||||
@ -225,7 +226,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
|
|||||||
self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
|
self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
|
||||||
npyscreen.Checkbox,
|
npyscreen.Checkbox,
|
||||||
name="Use xformers acceleration",
|
name="Use xformers acceleration",
|
||||||
value=saved_args.get("enable_xformers_memory_efficient_attention", False),
|
value=saved_args.get("enable_xformers_memory_efficient_attention", XFORMERS_AVAILABLE),
|
||||||
scroll_exit=True,
|
scroll_exit=True,
|
||||||
)
|
)
|
||||||
self.lr_scheduler = self.add_widget_intelligent(
|
self.lr_scheduler = self.add_widget_intelligent(
|
||||||
@ -429,7 +430,6 @@ def do_front_end(args: Namespace):
|
|||||||
print("** DETAILS:")
|
print("** DETAILS:")
|
||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
global_set_root(args.root_dir or Globals.root)
|
global_set_root(args.root_dir or Globals.root)
|
||||||
|
@ -67,7 +67,7 @@ else:
|
|||||||
"nearest": PIL.Image.NEAREST,
|
"nearest": PIL.Image.NEAREST,
|
||||||
}
|
}
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
XFORMERS_AVAILABLE = is_xformers_available
|
||||||
|
|
||||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||||
check_min_version("0.10.0.dev0")
|
check_min_version("0.10.0.dev0")
|
||||||
@ -227,7 +227,7 @@ def parse_args():
|
|||||||
training_group.add_argument(
|
training_group.add_argument(
|
||||||
"--train_batch_size",
|
"--train_batch_size",
|
||||||
type=int,
|
type=int,
|
||||||
default=16,
|
default=8 if XFORMERS_AVAILABLE else 3,
|
||||||
help="Batch size (per device) for the training dataloader.",
|
help="Batch size (per device) for the training dataloader.",
|
||||||
)
|
)
|
||||||
training_group.add_argument("--num_train_epochs", type=int, default=100)
|
training_group.add_argument("--num_train_epochs", type=int, default=100)
|
||||||
@ -324,6 +324,7 @@ def parse_args():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--enable_xformers_memory_efficient_attention",
|
"--enable_xformers_memory_efficient_attention",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
default=XFORMERS_AVAILABLE,
|
||||||
help="Whether or not to use xformers.",
|
help="Whether or not to use xformers.",
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -536,7 +537,7 @@ def do_textual_inversion_training(
|
|||||||
seed: int = None,
|
seed: int = None,
|
||||||
resolution: int = 512,
|
resolution: int = 512,
|
||||||
center_crop: bool = False,
|
center_crop: bool = False,
|
||||||
train_batch_size: int = 16,
|
train_batch_size: int = 4,
|
||||||
num_train_epochs: int = 100,
|
num_train_epochs: int = 100,
|
||||||
max_train_steps: int = 5000,
|
max_train_steps: int = 5000,
|
||||||
gradient_accumulation_steps: int = 1,
|
gradient_accumulation_steps: int = 1,
|
||||||
|
Reference in New Issue
Block a user