Merge branch 'v2.3' into security/scan-ckpt-models

This commit is contained in:
blessedcoolant 2023-03-24 22:11:34 +13:00 committed by GitHub
commit abaa91195d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 73 additions and 22 deletions

View File

@ -154,8 +154,11 @@ training sets will converge with 2000-3000 steps.
This adjusts how many training images are processed simultaneously in This adjusts how many training images are processed simultaneously in
each step. Higher values will cause the training process to run more each step. Higher values will cause the training process to run more
quickly, but use more memory. The default size will run with GPUs with quickly, but use more memory. The default size is selected based on
as little as 12 GB. whether you have the `xformers` memory-efficient attention library
installed. If `xformers` is available, the batch size will be 8,
otherwise 3. These values were chosen to allow training to run with
GPUs with as little as 12 GB VRAM.
### Learning rate ### Learning rate
@ -172,8 +175,10 @@ learning rate to improve performance.
### Use xformers acceleration ### Use xformers acceleration
This will activate XFormers memory-efficient attention. You need to This will activate XFormers memory-efficient attention, which will
have XFormers installed for this to have an effect. reduce memory requirements by half or more and allow you to select a
higher batch size. You need to have XFormers installed for this to
have an effect.
### Learning rate scheduler ### Learning rate scheduler
@ -250,6 +255,49 @@ invokeai-ti \
--only_save_embeds --only_save_embeds
``` ```
## Using Distributed Training
If you have multiple GPUs on one machine, or a cluster of GPU-enabled
machines, you can activate distributed training. See the [HuggingFace
Accelerate pages](https://huggingface.co/docs/accelerate/index) for
full information, but the basic recipe is:
1. Enter the InvokeAI developer's console command line by selecting
option [8] from the `invoke.sh`/`invoke.bat` script.
2. Configurate Accelerate using `accelerate config`:
```sh
accelerate config
```
This will guide you through the configuration process, including
specifying how many machines you will run training on and the number
of GPUs pe rmachine.
You only need to do this once.
3. Launch training from the command line using `accelerate launch`. Be sure
that your current working directory is the InvokeAI root directory (usually
named `invokeai` in your home directory):
```sh
accelerate launch .venv/bin/invokeai-ti \
--model=stable-diffusion-1.5 \
--resolution=512 \
--learnable_property=object \
--initializer_token='*' \
--placeholder_token='<shraddha>' \
--train_data_dir=/home/lstein/invokeai/text-inversion-training-data/shraddha \
--output_dir=/home/lstein/invokeai/text-inversion-training/shraddha \
--scale_lr \
--train_batch_size=10 \
--gradient_accumulation_steps=4 \
--max_train_steps=2000 \
--learning_rate=0.0005 \
--lr_scheduler=constant \
--mixed_precision=fp16 \
--only_save_embeds
```
## Using Embeddings ## Using Embeddings
After training completes, the resultant embeddings will be saved into your `$INVOKEAI_ROOT/embeddings/<trigger word>/learned_embeds.bin`. After training completes, the resultant embeddings will be saved into your `$INVOKEAI_ROOT/embeddings/<trigger word>/learned_embeds.bin`.

View File

@ -1,2 +1,2 @@
__version__='2.3.2.post1' __version__='2.3.3-rc1'

View File

@ -157,7 +157,7 @@ def _run_invoke(
): ):
pid = os.getpid() pid = os.getpid()
logdir.mkdir(parents=True, exist_ok=True) logdir.mkdir(parents=True, exist_ok=True)
logfile = Path(logdir, f'{time.strftime("%Y-%m-%d-%H:%M:%S")}-pid={pid}.txt') logfile = Path(logdir, f'{time.strftime("%Y-%m-%d_%H-%M-%S")}-pid={pid}.txt')
print( print(
f">> Process {pid} running on GPU {gpu}; logging to {logfile}", file=sys.stderr f">> Process {pid} running on GPU {gpu}; logging to {logfile}", file=sys.stderr
) )

View File

@ -30,14 +30,17 @@ class PngWriter:
prefix = self._unused_prefix() prefix = self._unused_prefix()
else: else:
with open(next_prefix_file,'r') as file: with open(next_prefix_file,'r') as file:
prefix=int(file.readline() or int(self._unused_prefix())-1) prefix = 0
prefix+=1 try:
prefix=int(file.readline())
except (TypeError, ValueError):
prefix=self._unused_prefix()
with open(next_prefix_file,'w') as file: with open(next_prefix_file,'w') as file:
file.write(str(prefix)) file.write(str(prefix+1))
return f'{prefix:06}' return f'{prefix:06}'
# gives the next unique prefix in outdir # gives the next unique prefix in outdir
def _unused_prefix(self): def _unused_prefix(self)->int:
# sort reverse alphabetically until we find max+1 # sort reverse alphabetically until we find max+1
dirlist = sorted(os.listdir(self.outdir), reverse=True) dirlist = sorted(os.listdir(self.outdir), reverse=True)
# find the first filename that matches our pattern or return 000000.0.png # find the first filename that matches our pattern or return 000000.0.png
@ -45,8 +48,7 @@ class PngWriter:
(f for f in dirlist if re.match('^(\d+)\..*\.png', f)), (f for f in dirlist if re.match('^(\d+)\..*\.png', f)),
'0000000.0.png', '0000000.0.png',
) )
basecount = int(existing_name.split('.', 1)[0]) + 1 return int(existing_name.split('.', 1)[0]) + 1
return f'{basecount:06}'
# saves image named _image_ to outdir/name, writing metadata from prompt # saves image named _image_ to outdir/name, writing metadata from prompt
# returns full path of output # returns full path of output

View File

@ -17,6 +17,7 @@ from pathlib import Path
from typing import List, Tuple from typing import List, Tuple
import npyscreen import npyscreen
from diffusers.utils.import_utils import is_xformers_available
from npyscreen import widget from npyscreen import widget
from omegaconf import OmegaConf from omegaconf import OmegaConf
@ -29,7 +30,7 @@ from ldm.invoke.training.textual_inversion_training import (
TRAINING_DATA = "text-inversion-training-data" TRAINING_DATA = "text-inversion-training-data"
TRAINING_DIR = "text-inversion-output" TRAINING_DIR = "text-inversion-output"
CONF_FILE = "preferences.conf" CONF_FILE = "preferences.conf"
XFORMERS_AVAILABLE = is_xformers_available()
class textualInversionForm(npyscreen.FormMultiPageAction): class textualInversionForm(npyscreen.FormMultiPageAction):
resolutions = [512, 768, 1024] resolutions = [512, 768, 1024]
@ -178,7 +179,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
out_of=10000, out_of=10000,
step=500, step=500,
lowest=1, lowest=1,
value=saved_args.get("max_train_steps", 3000), value=saved_args.get("max_train_steps", 2500),
scroll_exit=True, scroll_exit=True,
) )
self.train_batch_size = self.add_widget_intelligent( self.train_batch_size = self.add_widget_intelligent(
@ -187,7 +188,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
out_of=50, out_of=50,
step=1, step=1,
lowest=1, lowest=1,
value=saved_args.get("train_batch_size", 8), value=saved_args.get("train_batch_size", 8 if XFORMERS_AVAILABLE else 3),
scroll_exit=True, scroll_exit=True,
) )
self.gradient_accumulation_steps = self.add_widget_intelligent( self.gradient_accumulation_steps = self.add_widget_intelligent(
@ -225,7 +226,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent( self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
npyscreen.Checkbox, npyscreen.Checkbox,
name="Use xformers acceleration", name="Use xformers acceleration",
value=saved_args.get("enable_xformers_memory_efficient_attention", False), value=saved_args.get("enable_xformers_memory_efficient_attention", XFORMERS_AVAILABLE),
scroll_exit=True, scroll_exit=True,
) )
self.lr_scheduler = self.add_widget_intelligent( self.lr_scheduler = self.add_widget_intelligent(
@ -428,8 +429,7 @@ def do_front_end(args: Namespace):
print(str(e)) print(str(e))
print("** DETAILS:") print("** DETAILS:")
print(traceback.format_exc()) print(traceback.format_exc())
def main(): def main():
args = parse_args() args = parse_args()
global_set_root(args.root_dir or Globals.root) global_set_root(args.root_dir or Globals.root)

View File

@ -67,7 +67,7 @@ else:
"nearest": PIL.Image.NEAREST, "nearest": PIL.Image.NEAREST,
} }
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
XFORMERS_AVAILABLE = is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks. # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.10.0.dev0") check_min_version("0.10.0.dev0")
@ -227,7 +227,7 @@ def parse_args():
training_group.add_argument( training_group.add_argument(
"--train_batch_size", "--train_batch_size",
type=int, type=int,
default=16, default=8 if XFORMERS_AVAILABLE else 3,
help="Batch size (per device) for the training dataloader.", help="Batch size (per device) for the training dataloader.",
) )
training_group.add_argument("--num_train_epochs", type=int, default=100) training_group.add_argument("--num_train_epochs", type=int, default=100)
@ -324,6 +324,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--enable_xformers_memory_efficient_attention", "--enable_xformers_memory_efficient_attention",
action="store_true", action="store_true",
default=XFORMERS_AVAILABLE,
help="Whether or not to use xformers.", help="Whether or not to use xformers.",
) )
@ -536,7 +537,7 @@ def do_textual_inversion_training(
seed: int = None, seed: int = None,
resolution: int = 512, resolution: int = 512,
center_crop: bool = False, center_crop: bool = False,
train_batch_size: int = 16, train_batch_size: int = 4,
num_train_epochs: int = 100, num_train_epochs: int = 100,
max_train_steps: int = 5000, max_train_steps: int = 5000,
gradient_accumulation_steps: int = 1, gradient_accumulation_steps: int = 1,

View File

@ -70,7 +70,7 @@ dependencies = [
"taming-transformers-rom1504", "taming-transformers-rom1504",
"test-tube>=0.7.5", "test-tube>=0.7.5",
"torch-fidelity", "torch-fidelity",
"torch>=1.13.1", "torch~=1.13.1",
"torchmetrics", "torchmetrics",
"torchvision>=0.14.1", "torchvision>=0.14.1",
"transformers~=4.26", "transformers~=4.26",