Merge branch 'v2.3' into security/scan-ckpt-models

2024-08-30 20:32:17 +00:00 · 2023-03-24 22:11:34 +13:00 · 2023-03-24 22:11:34 +13:00 · abaa91195d
commit abaa91195d
parent 4a3951681c 1806bfb755
7 changed files with 73 additions and 22 deletions
--- a/docs/features/TEXTUAL_INVERSION.md
+++ b/docs/features/TEXTUAL_INVERSION.md
@ -154,8 +154,11 @@ training sets will converge with 2000-3000 steps.

 This adjusts how many training images are processed simultaneously in
 each step. Higher values will cause the training process to run more
-quickly, but use more memory. The default size will run with GPUs with
-as little as 12 GB.
+quickly, but use more memory. The default size is selected based on
+whether you have the `xformers` memory-efficient attention library
+installed. If `xformers` is available, the batch size will be 8,
+otherwise 3.  These values were chosen to allow training to run with
+GPUs with as little as 12 GB VRAM.

 ### Learning rate

@ -172,8 +175,10 @@ learning rate to improve performance.

 ### Use xformers acceleration

-This will activate XFormers memory-efficient attention. You need to
-have XFormers installed for this to have an effect.
+This will activate XFormers memory-efficient attention, which will
+reduce memory requirements by half or more and allow you to select a
+higher batch size. You need to have XFormers installed for this to
+have an effect.

 ### Learning rate scheduler

@ -250,6 +255,49 @@ invokeai-ti \
       --only_save_embeds
 ```

+## Using Distributed Training
+
+If you have multiple GPUs on one machine, or a cluster of GPU-enabled
+machines, you can activate distributed training. See the [HuggingFace
+Accelerate pages](https://huggingface.co/docs/accelerate/index) for
+full information, but the basic recipe is:
+
+1. Enter the InvokeAI developer's console command line by selecting
+option [8] from the `invoke.sh`/`invoke.bat` script.
+
+2. Configurate Accelerate using `accelerate config`:
+```sh
+accelerate config
+```
+This will guide you through the configuration process, including
+specifying how many machines you will run training on and the number
+of GPUs pe rmachine.
+
+You only need to do this once.
+
+3. Launch training from the command line using `accelerate launch`. Be sure
+that your current working directory is the InvokeAI root directory (usually
+named `invokeai` in your home directory):
+
+```sh
+accelerate launch .venv/bin/invokeai-ti \
+       --model=stable-diffusion-1.5 \
+       --resolution=512 \
+       --learnable_property=object \
+       --initializer_token='*' \
+       --placeholder_token='<shraddha>' \
+       --train_data_dir=/home/lstein/invokeai/text-inversion-training-data/shraddha \
+       --output_dir=/home/lstein/invokeai/text-inversion-training/shraddha \
+       --scale_lr \
+       --train_batch_size=10 \
+       --gradient_accumulation_steps=4 \
+       --max_train_steps=2000 \
+       --learning_rate=0.0005 \
+       --lr_scheduler=constant \
+       --mixed_precision=fp16 \
+       --only_save_embeds
+```
+
 ## Using Embeddings

 After training completes, the resultant embeddings will be saved into your `$INVOKEAI_ROOT/embeddings/<trigger word>/learned_embeds.bin`.
--- a/ldm/invoke/_version.py
+++ b/ldm/invoke/_version.py
@ -1,2 +1,2 @@

-__version__='2.3.2.post1'
+__version__='2.3.3-rc1'
--- a/ldm/invoke/dynamic_prompts.py
+++ b/ldm/invoke/dynamic_prompts.py
@ -157,7 +157,7 @@ def _run_invoke(
 ):
    pid = os.getpid()
    logdir.mkdir(parents=True, exist_ok=True)
-    logfile = Path(logdir, f'{time.strftime("%Y-%m-%d-%H:%M:%S")}-pid={pid}.txt')
+    logfile = Path(logdir, f'{time.strftime("%Y-%m-%d_%H-%M-%S")}-pid={pid}.txt')
    print(
        f">> Process {pid} running on GPU {gpu}; logging to {logfile}", file=sys.stderr
    )
--- a/ldm/invoke/pngwriter.py
+++ b/ldm/invoke/pngwriter.py
@ -30,14 +30,17 @@ class PngWriter:
                prefix = self._unused_prefix()
            else:
                with open(next_prefix_file,'r') as file:
-                    prefix=int(file.readline() or int(self._unused_prefix())-1)
-                    prefix+=1
+                    prefix = 0
+                    try:
+                        prefix=int(file.readline())
+                    except (TypeError, ValueError):
+                        prefix=self._unused_prefix()
            with open(next_prefix_file,'w') as file:
-                file.write(str(prefix))
+                file.write(str(prefix+1))
        return f'{prefix:06}'

    # gives the next unique prefix in outdir
-    def _unused_prefix(self):
+    def _unused_prefix(self)->int:
        # sort reverse alphabetically until we find max+1
        dirlist = sorted(os.listdir(self.outdir), reverse=True)
        # find the first filename that matches our pattern or return 000000.0.png
@ -45,8 +48,7 @@ class PngWriter:
            (f for f in dirlist if re.match('^(\d+)\..*\.png', f)),
            '0000000.0.png',
        )
-        basecount = int(existing_name.split('.', 1)[0]) + 1
-        return f'{basecount:06}'
+        return int(existing_name.split('.', 1)[0]) + 1

    # saves image named _image_ to outdir/name, writing metadata from prompt
    # returns full path of output
--- a/ldm/invoke/training/textual_inversion.py
+++ b/ldm/invoke/training/textual_inversion.py
@ -17,6 +17,7 @@ from pathlib import Path
 from typing import List, Tuple

 import npyscreen
+from diffusers.utils.import_utils import is_xformers_available
 from npyscreen import widget
 from omegaconf import OmegaConf

@ -29,7 +30,7 @@ from ldm.invoke.training.textual_inversion_training import (
 TRAINING_DATA = "text-inversion-training-data"
 TRAINING_DIR = "text-inversion-output"
 CONF_FILE = "preferences.conf"
-
+XFORMERS_AVAILABLE = is_xformers_available()

 class textualInversionForm(npyscreen.FormMultiPageAction):
    resolutions = [512, 768, 1024]
@ -178,7 +179,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
            out_of=10000,
            step=500,
            lowest=1,
-            value=saved_args.get("max_train_steps", 3000),
+            value=saved_args.get("max_train_steps", 2500),
            scroll_exit=True,
        )
        self.train_batch_size = self.add_widget_intelligent(
@ -187,7 +188,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
            out_of=50,
            step=1,
            lowest=1,
-            value=saved_args.get("train_batch_size", 8),
+            value=saved_args.get("train_batch_size", 8 if XFORMERS_AVAILABLE else 3),
            scroll_exit=True,
        )
        self.gradient_accumulation_steps = self.add_widget_intelligent(
@ -225,7 +226,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
        self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
            npyscreen.Checkbox,
            name="Use xformers acceleration",
-            value=saved_args.get("enable_xformers_memory_efficient_attention", False),
+            value=saved_args.get("enable_xformers_memory_efficient_attention", XFORMERS_AVAILABLE),
            scroll_exit=True,
        )
        self.lr_scheduler = self.add_widget_intelligent(
@ -428,8 +429,7 @@ def do_front_end(args: Namespace):
            print(str(e))
            print("** DETAILS:")
            print(traceback.format_exc())
-
-
+            
 def main():
    args = parse_args()
    global_set_root(args.root_dir or Globals.root)
--- a/ldm/invoke/training/textual_inversion_training.py
+++ b/ldm/invoke/training/textual_inversion_training.py
@ -67,7 +67,7 @@ else:
        "nearest": PIL.Image.NEAREST,
    }
 # ------------------------------------------------------------------------------
-
+XFORMERS_AVAILABLE = is_xformers_available

 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 check_min_version("0.10.0.dev0")
@ -227,7 +227,7 @@ def parse_args():
    training_group.add_argument(
        "--train_batch_size",
        type=int,
-        default=16,
+        default=8 if XFORMERS_AVAILABLE else 3,
        help="Batch size (per device) for the training dataloader.",
    )
    training_group.add_argument("--num_train_epochs", type=int, default=100)
@ -324,6 +324,7 @@ def parse_args():
    parser.add_argument(
        "--enable_xformers_memory_efficient_attention",
        action="store_true",
+        default=XFORMERS_AVAILABLE,
        help="Whether or not to use xformers.",
    )

@ -536,7 +537,7 @@ def do_textual_inversion_training(
    seed: int = None,
    resolution: int = 512,
    center_crop: bool = False,
-    train_batch_size: int = 16,
+    train_batch_size: int = 4,
    num_train_epochs: int = 100,
    max_train_steps: int = 5000,
    gradient_accumulation_steps: int = 1,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -70,7 +70,7 @@ dependencies = [
  "taming-transformers-rom1504",
  "test-tube>=0.7.5",
  "torch-fidelity",
-  "torch>=1.13.1",
+  "torch~=1.13.1",
  "torchmetrics",
  "torchvision>=0.14.1",
  "transformers~=4.26",