diff --git a/docs/features/TEXTUAL_INVERSION.md b/docs/features/TEXTUAL_INVERSION.md index c0b34e3f7c..ebb09d6ee2 100644 --- a/docs/features/TEXTUAL_INVERSION.md +++ b/docs/features/TEXTUAL_INVERSION.md @@ -154,8 +154,11 @@ training sets will converge with 2000-3000 steps. This adjusts how many training images are processed simultaneously in each step. Higher values will cause the training process to run more -quickly, but use more memory. The default size will run with GPUs with -as little as 12 GB. +quickly, but use more memory. The default size is selected based on +whether you have the `xformers` memory-efficient attention library +installed. If `xformers` is available, the batch size will be 8, +otherwise 3. These values were chosen to allow training to run with +GPUs with as little as 12 GB VRAM. ### Learning rate @@ -172,8 +175,10 @@ learning rate to improve performance. ### Use xformers acceleration -This will activate XFormers memory-efficient attention. You need to -have XFormers installed for this to have an effect. +This will activate XFormers memory-efficient attention, which will +reduce memory requirements by half or more and allow you to select a +higher batch size. You need to have XFormers installed for this to +have an effect. ### Learning rate scheduler @@ -250,6 +255,49 @@ invokeai-ti \ --only_save_embeds ``` +## Using Distributed Training + +If you have multiple GPUs on one machine, or a cluster of GPU-enabled +machines, you can activate distributed training. See the [HuggingFace +Accelerate pages](https://huggingface.co/docs/accelerate/index) for +full information, but the basic recipe is: + +1. Enter the InvokeAI developer's console command line by selecting +option [8] from the `invoke.sh`/`invoke.bat` script. + +2. Configurate Accelerate using `accelerate config`: +```sh +accelerate config +``` +This will guide you through the configuration process, including +specifying how many machines you will run training on and the number +of GPUs pe rmachine. + +You only need to do this once. + +3. Launch training from the command line using `accelerate launch`. Be sure +that your current working directory is the InvokeAI root directory (usually +named `invokeai` in your home directory): + +```sh +accelerate launch .venv/bin/invokeai-ti \ + --model=stable-diffusion-1.5 \ + --resolution=512 \ + --learnable_property=object \ + --initializer_token='*' \ + --placeholder_token='' \ + --train_data_dir=/home/lstein/invokeai/text-inversion-training-data/shraddha \ + --output_dir=/home/lstein/invokeai/text-inversion-training/shraddha \ + --scale_lr \ + --train_batch_size=10 \ + --gradient_accumulation_steps=4 \ + --max_train_steps=2000 \ + --learning_rate=0.0005 \ + --lr_scheduler=constant \ + --mixed_precision=fp16 \ + --only_save_embeds +``` + ## Using Embeddings After training completes, the resultant embeddings will be saved into your `$INVOKEAI_ROOT/embeddings//learned_embeds.bin`. diff --git a/ldm/invoke/_version.py b/ldm/invoke/_version.py index de365d650b..c2881e4f9e 100644 --- a/ldm/invoke/_version.py +++ b/ldm/invoke/_version.py @@ -1,2 +1,2 @@ -__version__='2.3.2.post1' +__version__='2.3.3-rc1' diff --git a/ldm/invoke/dynamic_prompts.py b/ldm/invoke/dynamic_prompts.py index c196ce7c33..de02c55b56 100755 --- a/ldm/invoke/dynamic_prompts.py +++ b/ldm/invoke/dynamic_prompts.py @@ -157,7 +157,7 @@ def _run_invoke( ): pid = os.getpid() logdir.mkdir(parents=True, exist_ok=True) - logfile = Path(logdir, f'{time.strftime("%Y-%m-%d-%H:%M:%S")}-pid={pid}.txt') + logfile = Path(logdir, f'{time.strftime("%Y-%m-%d_%H-%M-%S")}-pid={pid}.txt') print( f">> Process {pid} running on GPU {gpu}; logging to {logfile}", file=sys.stderr ) diff --git a/ldm/invoke/pngwriter.py b/ldm/invoke/pngwriter.py index da5af82aa8..cdd9107f20 100644 --- a/ldm/invoke/pngwriter.py +++ b/ldm/invoke/pngwriter.py @@ -30,14 +30,17 @@ class PngWriter: prefix = self._unused_prefix() else: with open(next_prefix_file,'r') as file: - prefix=int(file.readline() or int(self._unused_prefix())-1) - prefix+=1 + prefix = 0 + try: + prefix=int(file.readline()) + except (TypeError, ValueError): + prefix=self._unused_prefix() with open(next_prefix_file,'w') as file: - file.write(str(prefix)) + file.write(str(prefix+1)) return f'{prefix:06}' # gives the next unique prefix in outdir - def _unused_prefix(self): + def _unused_prefix(self)->int: # sort reverse alphabetically until we find max+1 dirlist = sorted(os.listdir(self.outdir), reverse=True) # find the first filename that matches our pattern or return 000000.0.png @@ -45,8 +48,7 @@ class PngWriter: (f for f in dirlist if re.match('^(\d+)\..*\.png', f)), '0000000.0.png', ) - basecount = int(existing_name.split('.', 1)[0]) + 1 - return f'{basecount:06}' + return int(existing_name.split('.', 1)[0]) + 1 # saves image named _image_ to outdir/name, writing metadata from prompt # returns full path of output diff --git a/ldm/invoke/training/textual_inversion.py b/ldm/invoke/training/textual_inversion.py index 2961e4d99c..f1e8e2d679 100755 --- a/ldm/invoke/training/textual_inversion.py +++ b/ldm/invoke/training/textual_inversion.py @@ -17,6 +17,7 @@ from pathlib import Path from typing import List, Tuple import npyscreen +from diffusers.utils.import_utils import is_xformers_available from npyscreen import widget from omegaconf import OmegaConf @@ -29,7 +30,7 @@ from ldm.invoke.training.textual_inversion_training import ( TRAINING_DATA = "text-inversion-training-data" TRAINING_DIR = "text-inversion-output" CONF_FILE = "preferences.conf" - +XFORMERS_AVAILABLE = is_xformers_available() class textualInversionForm(npyscreen.FormMultiPageAction): resolutions = [512, 768, 1024] @@ -178,7 +179,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction): out_of=10000, step=500, lowest=1, - value=saved_args.get("max_train_steps", 3000), + value=saved_args.get("max_train_steps", 2500), scroll_exit=True, ) self.train_batch_size = self.add_widget_intelligent( @@ -187,7 +188,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction): out_of=50, step=1, lowest=1, - value=saved_args.get("train_batch_size", 8), + value=saved_args.get("train_batch_size", 8 if XFORMERS_AVAILABLE else 3), scroll_exit=True, ) self.gradient_accumulation_steps = self.add_widget_intelligent( @@ -225,7 +226,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction): self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent( npyscreen.Checkbox, name="Use xformers acceleration", - value=saved_args.get("enable_xformers_memory_efficient_attention", False), + value=saved_args.get("enable_xformers_memory_efficient_attention", XFORMERS_AVAILABLE), scroll_exit=True, ) self.lr_scheduler = self.add_widget_intelligent( @@ -428,8 +429,7 @@ def do_front_end(args: Namespace): print(str(e)) print("** DETAILS:") print(traceback.format_exc()) - - + def main(): args = parse_args() global_set_root(args.root_dir or Globals.root) diff --git a/ldm/invoke/training/textual_inversion_training.py b/ldm/invoke/training/textual_inversion_training.py index 7794712bc1..efc0986d6c 100644 --- a/ldm/invoke/training/textual_inversion_training.py +++ b/ldm/invoke/training/textual_inversion_training.py @@ -67,7 +67,7 @@ else: "nearest": PIL.Image.NEAREST, } # ------------------------------------------------------------------------------ - +XFORMERS_AVAILABLE = is_xformers_available # Will error if the minimal version of diffusers is not installed. Remove at your own risks. check_min_version("0.10.0.dev0") @@ -227,7 +227,7 @@ def parse_args(): training_group.add_argument( "--train_batch_size", type=int, - default=16, + default=8 if XFORMERS_AVAILABLE else 3, help="Batch size (per device) for the training dataloader.", ) training_group.add_argument("--num_train_epochs", type=int, default=100) @@ -324,6 +324,7 @@ def parse_args(): parser.add_argument( "--enable_xformers_memory_efficient_attention", action="store_true", + default=XFORMERS_AVAILABLE, help="Whether or not to use xformers.", ) @@ -536,7 +537,7 @@ def do_textual_inversion_training( seed: int = None, resolution: int = 512, center_crop: bool = False, - train_batch_size: int = 16, + train_batch_size: int = 4, num_train_epochs: int = 100, max_train_steps: int = 5000, gradient_accumulation_steps: int = 1, diff --git a/pyproject.toml b/pyproject.toml index d5c8c03163..5394d31354 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ dependencies = [ "taming-transformers-rom1504", "test-tube>=0.7.5", "torch-fidelity", - "torch>=1.13.1", + "torch~=1.13.1", "torchmetrics", "torchvision>=0.14.1", "transformers~=4.26",