mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Merge branch 'v2.3' into security/scan-ckpt-models
This commit is contained in:
commit
abaa91195d
@ -154,8 +154,11 @@ training sets will converge with 2000-3000 steps.
|
||||
|
||||
This adjusts how many training images are processed simultaneously in
|
||||
each step. Higher values will cause the training process to run more
|
||||
quickly, but use more memory. The default size will run with GPUs with
|
||||
as little as 12 GB.
|
||||
quickly, but use more memory. The default size is selected based on
|
||||
whether you have the `xformers` memory-efficient attention library
|
||||
installed. If `xformers` is available, the batch size will be 8,
|
||||
otherwise 3. These values were chosen to allow training to run with
|
||||
GPUs with as little as 12 GB VRAM.
|
||||
|
||||
### Learning rate
|
||||
|
||||
@ -172,8 +175,10 @@ learning rate to improve performance.
|
||||
|
||||
### Use xformers acceleration
|
||||
|
||||
This will activate XFormers memory-efficient attention. You need to
|
||||
have XFormers installed for this to have an effect.
|
||||
This will activate XFormers memory-efficient attention, which will
|
||||
reduce memory requirements by half or more and allow you to select a
|
||||
higher batch size. You need to have XFormers installed for this to
|
||||
have an effect.
|
||||
|
||||
### Learning rate scheduler
|
||||
|
||||
@ -250,6 +255,49 @@ invokeai-ti \
|
||||
--only_save_embeds
|
||||
```
|
||||
|
||||
## Using Distributed Training
|
||||
|
||||
If you have multiple GPUs on one machine, or a cluster of GPU-enabled
|
||||
machines, you can activate distributed training. See the [HuggingFace
|
||||
Accelerate pages](https://huggingface.co/docs/accelerate/index) for
|
||||
full information, but the basic recipe is:
|
||||
|
||||
1. Enter the InvokeAI developer's console command line by selecting
|
||||
option [8] from the `invoke.sh`/`invoke.bat` script.
|
||||
|
||||
2. Configurate Accelerate using `accelerate config`:
|
||||
```sh
|
||||
accelerate config
|
||||
```
|
||||
This will guide you through the configuration process, including
|
||||
specifying how many machines you will run training on and the number
|
||||
of GPUs pe rmachine.
|
||||
|
||||
You only need to do this once.
|
||||
|
||||
3. Launch training from the command line using `accelerate launch`. Be sure
|
||||
that your current working directory is the InvokeAI root directory (usually
|
||||
named `invokeai` in your home directory):
|
||||
|
||||
```sh
|
||||
accelerate launch .venv/bin/invokeai-ti \
|
||||
--model=stable-diffusion-1.5 \
|
||||
--resolution=512 \
|
||||
--learnable_property=object \
|
||||
--initializer_token='*' \
|
||||
--placeholder_token='<shraddha>' \
|
||||
--train_data_dir=/home/lstein/invokeai/text-inversion-training-data/shraddha \
|
||||
--output_dir=/home/lstein/invokeai/text-inversion-training/shraddha \
|
||||
--scale_lr \
|
||||
--train_batch_size=10 \
|
||||
--gradient_accumulation_steps=4 \
|
||||
--max_train_steps=2000 \
|
||||
--learning_rate=0.0005 \
|
||||
--lr_scheduler=constant \
|
||||
--mixed_precision=fp16 \
|
||||
--only_save_embeds
|
||||
```
|
||||
|
||||
## Using Embeddings
|
||||
|
||||
After training completes, the resultant embeddings will be saved into your `$INVOKEAI_ROOT/embeddings/<trigger word>/learned_embeds.bin`.
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__='2.3.2.post1'
|
||||
__version__='2.3.3-rc1'
|
||||
|
@ -157,7 +157,7 @@ def _run_invoke(
|
||||
):
|
||||
pid = os.getpid()
|
||||
logdir.mkdir(parents=True, exist_ok=True)
|
||||
logfile = Path(logdir, f'{time.strftime("%Y-%m-%d-%H:%M:%S")}-pid={pid}.txt')
|
||||
logfile = Path(logdir, f'{time.strftime("%Y-%m-%d_%H-%M-%S")}-pid={pid}.txt')
|
||||
print(
|
||||
f">> Process {pid} running on GPU {gpu}; logging to {logfile}", file=sys.stderr
|
||||
)
|
||||
|
@ -30,14 +30,17 @@ class PngWriter:
|
||||
prefix = self._unused_prefix()
|
||||
else:
|
||||
with open(next_prefix_file,'r') as file:
|
||||
prefix=int(file.readline() or int(self._unused_prefix())-1)
|
||||
prefix+=1
|
||||
prefix = 0
|
||||
try:
|
||||
prefix=int(file.readline())
|
||||
except (TypeError, ValueError):
|
||||
prefix=self._unused_prefix()
|
||||
with open(next_prefix_file,'w') as file:
|
||||
file.write(str(prefix))
|
||||
file.write(str(prefix+1))
|
||||
return f'{prefix:06}'
|
||||
|
||||
# gives the next unique prefix in outdir
|
||||
def _unused_prefix(self):
|
||||
def _unused_prefix(self)->int:
|
||||
# sort reverse alphabetically until we find max+1
|
||||
dirlist = sorted(os.listdir(self.outdir), reverse=True)
|
||||
# find the first filename that matches our pattern or return 000000.0.png
|
||||
@ -45,8 +48,7 @@ class PngWriter:
|
||||
(f for f in dirlist if re.match('^(\d+)\..*\.png', f)),
|
||||
'0000000.0.png',
|
||||
)
|
||||
basecount = int(existing_name.split('.', 1)[0]) + 1
|
||||
return f'{basecount:06}'
|
||||
return int(existing_name.split('.', 1)[0]) + 1
|
||||
|
||||
# saves image named _image_ to outdir/name, writing metadata from prompt
|
||||
# returns full path of output
|
||||
|
@ -17,6 +17,7 @@ from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
|
||||
import npyscreen
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from npyscreen import widget
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
@ -29,7 +30,7 @@ from ldm.invoke.training.textual_inversion_training import (
|
||||
TRAINING_DATA = "text-inversion-training-data"
|
||||
TRAINING_DIR = "text-inversion-output"
|
||||
CONF_FILE = "preferences.conf"
|
||||
|
||||
XFORMERS_AVAILABLE = is_xformers_available()
|
||||
|
||||
class textualInversionForm(npyscreen.FormMultiPageAction):
|
||||
resolutions = [512, 768, 1024]
|
||||
@ -178,7 +179,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
|
||||
out_of=10000,
|
||||
step=500,
|
||||
lowest=1,
|
||||
value=saved_args.get("max_train_steps", 3000),
|
||||
value=saved_args.get("max_train_steps", 2500),
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.train_batch_size = self.add_widget_intelligent(
|
||||
@ -187,7 +188,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
|
||||
out_of=50,
|
||||
step=1,
|
||||
lowest=1,
|
||||
value=saved_args.get("train_batch_size", 8),
|
||||
value=saved_args.get("train_batch_size", 8 if XFORMERS_AVAILABLE else 3),
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.gradient_accumulation_steps = self.add_widget_intelligent(
|
||||
@ -225,7 +226,7 @@ class textualInversionForm(npyscreen.FormMultiPageAction):
|
||||
self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent(
|
||||
npyscreen.Checkbox,
|
||||
name="Use xformers acceleration",
|
||||
value=saved_args.get("enable_xformers_memory_efficient_attention", False),
|
||||
value=saved_args.get("enable_xformers_memory_efficient_attention", XFORMERS_AVAILABLE),
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.lr_scheduler = self.add_widget_intelligent(
|
||||
@ -428,8 +429,7 @@ def do_front_end(args: Namespace):
|
||||
print(str(e))
|
||||
print("** DETAILS:")
|
||||
print(traceback.format_exc())
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
global_set_root(args.root_dir or Globals.root)
|
||||
|
@ -67,7 +67,7 @@ else:
|
||||
"nearest": PIL.Image.NEAREST,
|
||||
}
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
XFORMERS_AVAILABLE = is_xformers_available
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.10.0.dev0")
|
||||
@ -227,7 +227,7 @@ def parse_args():
|
||||
training_group.add_argument(
|
||||
"--train_batch_size",
|
||||
type=int,
|
||||
default=16,
|
||||
default=8 if XFORMERS_AVAILABLE else 3,
|
||||
help="Batch size (per device) for the training dataloader.",
|
||||
)
|
||||
training_group.add_argument("--num_train_epochs", type=int, default=100)
|
||||
@ -324,6 +324,7 @@ def parse_args():
|
||||
parser.add_argument(
|
||||
"--enable_xformers_memory_efficient_attention",
|
||||
action="store_true",
|
||||
default=XFORMERS_AVAILABLE,
|
||||
help="Whether or not to use xformers.",
|
||||
)
|
||||
|
||||
@ -536,7 +537,7 @@ def do_textual_inversion_training(
|
||||
seed: int = None,
|
||||
resolution: int = 512,
|
||||
center_crop: bool = False,
|
||||
train_batch_size: int = 16,
|
||||
train_batch_size: int = 4,
|
||||
num_train_epochs: int = 100,
|
||||
max_train_steps: int = 5000,
|
||||
gradient_accumulation_steps: int = 1,
|
||||
|
@ -70,7 +70,7 @@ dependencies = [
|
||||
"taming-transformers-rom1504",
|
||||
"test-tube>=0.7.5",
|
||||
"torch-fidelity",
|
||||
"torch>=1.13.1",
|
||||
"torch~=1.13.1",
|
||||
"torchmetrics",
|
||||
"torchvision>=0.14.1",
|
||||
"transformers~=4.26",
|
||||
|
Loading…
Reference in New Issue
Block a user