implemented multiprocessing across multiple GPUs

2025-07-25 21:05:37 +00:00 · 2023-03-05 01:52:28 -05:00
parent 6d0e782d71
commit 45aa770cd1
5 changed files with 128 additions and 39 deletions
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -200,6 +200,8 @@ class Generate:
        # it wasn't actually doing anything. This logic could be reinstated.
        self.device = torch.device(choose_torch_device())
        print(f">> Using device_type {self.device.type}")
+        if self.device.type == 'cuda':
+            print(f">> CUDA device '{torch.cuda.get_device_name(torch.cuda.current_device())}' (GPU {os.environ.get('CUDA_VISIBLE_DEVICES') or 0})")
        if full_precision:
            if self.precision != "auto":
                raise ValueError("Remove --full_precision / -F if using --precision")
--- a/ldm/invoke/CLI.py
+++ b/ldm/invoke/CLI.py
@ -389,6 +389,7 @@ def main_loop(gen, opt):
                        prior_variations,
                        postprocessed,
                        first_seed,
+                        gen.model_name,
                    )
                    path = file_writer.save_image_and_prompt_to_png(
                        image=image,
@ -402,6 +403,7 @@ def main_loop(gen, opt):
                                else first_seed
                            ],
                            model_hash=gen.model_hash,
+                            model_id=gen.model_name,
                        ),
                        name=filename,
                        compress_level=opt.png_compression,
@ -941,13 +943,14 @@ def add_postprocessing_to_metadata(opt, original_file, new_file, tool, command):


 def prepare_image_metadata(
-    opt,
-    prefix,
-    seed,
-    operation="generate",
-    prior_variations=[],
-    postprocessed=False,
-    first_seed=None,
+        opt,
+        prefix,
+        seed,
+        operation="generate",
+        prior_variations=[],
+        postprocessed=False,
+        first_seed=None,
+        model_id='unknown',
 ):
    if postprocessed and opt.save_original:
        filename = choose_postprocess_name(opt, prefix, seed)
@ -955,7 +958,9 @@ def prepare_image_metadata(
        wildcards = dict(opt.__dict__)
        wildcards["prefix"] = prefix
        wildcards["seed"] = seed
+        wildcards["model_id"] = model_id
        try:
+            print(f'DEBUG: fnformat={opt.fnformat}')
            filename = opt.fnformat.format(**wildcards)
        except KeyError as e:
            print(
@ -972,18 +977,17 @@ def prepare_image_metadata(
        first_seed = first_seed or seed
        this_variation = [[seed, opt.variation_amount]]
        opt.with_variations = prior_variations + this_variation
-        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed)
+        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed,model_id=model_id)
    elif len(prior_variations) > 0:
-        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed)
+        formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed,model_id=model_id)
    elif operation == "postprocess":
        formatted_dream_prompt = "!fix " + opt.dream_prompt_str(
-            seed=seed, prompt=opt.input_file_path
+            seed=seed, prompt=opt.input_file_path, model_id=model_id,
        )
    else:
-        formatted_dream_prompt = opt.dream_prompt_str(seed=seed)
+        formatted_dream_prompt = opt.dream_prompt_str(seed=seed,model_id=model_id)
    return filename, formatted_dream_prompt

-
 def choose_postprocess_name(opt, prefix, seed) -> str:
    match = re.search("postprocess:(\w+)", opt.last_operation)
    if match:
--- a/ldm/invoke/args.py
+++ b/ldm/invoke/args.py
@ -333,7 +333,7 @@ class Args(object):
            switches.append(f'-V {formatted_variations}')
        if 'variations' in a and len(a['variations'])>0:
            switches.append(f'-V {a["variations"]}')
-        return ' '.join(switches)
+        return ' '.join(switches) + f' # model_id={kwargs.get("model_id","unknown model")}'

    def __getattribute__(self,name):
        '''
@ -878,7 +878,7 @@ class Args(object):
        )
        render_group.add_argument(
            '--fnformat',
-            default='{prefix}.{seed}.png',
+            default=None,
            type=str,
            help='Overwrite the filename format. You can use any argument as wildcard enclosed in curly braces. Default is {prefix}.{seed}.png',
        )
@ -1155,6 +1155,7 @@ def format_metadata(**kwargs):
 def metadata_dumps(opt,
                   seeds=[],
                   model_hash=None,
+                   model_id=None,
                   postprocessing=None):
    '''
    Given an Args object, returns a dict containing the keys and
@ -1167,7 +1168,7 @@ def metadata_dumps(opt,
    # top-level metadata minus `image` or `images`
    metadata = {
        'model'       : 'stable diffusion',
-        'model_id'    : opt.model,
+        'model_id'    : model_id or opt.model,
        'model_hash'  : model_hash,
        'app_id'      : ldm.invoke.__app_id__,
        'app_version' : ldm.invoke.__version__,
--- a/ldm/invoke/pngwriter.py
+++ b/ldm/invoke/pngwriter.py
@ -108,8 +108,6 @@ class PromptFormatter:
        switches.append(f'-H{opt.height       or t2i.height}')
        switches.append(f'-C{opt.cfg_scale    or t2i.cfg_scale}')
        switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
-# to do: put model name into the t2i object
-#        switches.append(f'--model{t2i.model_name}')
        if opt.seamless or t2i.seamless:
            switches.append('--seamless')
        if opt.init_img:
--- a/scripts/dynamic_prompts.py
+++ b/scripts/dynamic_prompts.py
@ -8,17 +8,19 @@ that scan across steps and other parameters.
 import argparse
 import io
 import json
+import os
 import pydoc
 import re
 import shutil
 import sys
 import numpy as np
-from dataclasses import dataclass
 from io import TextIOBase
 from itertools import product
 from pathlib import Path
+from multiprocessing import Process, Pipe
+from multiprocessing.connection import Connection
 from subprocess import PIPE, Popen
-from typing import Iterable, List, Union
+from typing import Iterable, List

 import yaml
 from omegaconf import OmegaConf, dictconfig, listconfig
@ -29,6 +31,7 @@ def expand_prompts(
    run_invoke: bool = False,
    invoke_model: str = None,
    invoke_outdir: Path = None,
+    processes_per_gpu: int = 1
 ):
    """
    :param template_file: A YAML file containing templated prompts and args
@ -42,24 +45,98 @@ def expand_prompts(
                conf = OmegaConf.load(fh)
    else:
        conf = OmegaConf.load(template_file)
+
+    # loading here to avoid long wait for help message
+    import torch
+    torch.multiprocessing.set_start_method('spawn')
+    gpu_count = torch.cuda.device_count() if torch.cuda.is_available() else 1
+    commands = expanded_invokeai_commands(conf, run_invoke)
+    children = list()
+    
    try:
        if run_invoke:
-            invokeai_args = [shutil.which("invokeai")]
+            invokeai_args = [shutil.which("invokeai"),"--from_file","-"]
            if invoke_model:
                invokeai_args.extend(("--model", invoke_model))
            if invoke_outdir:
-                invokeai_args.extend(("--outdir", invoke_outdir))
-            print(f"Calling invokeai with arguments {invokeai_args}", file=sys.stderr)
-            process = Popen(invokeai_args, stdin=PIPE, text=True)
-            with process.stdin as fh:
-                _do_expand(conf, file=fh)
-            process.wait()
+                invokeai_args.extend(("--outdir", os.path.expanduser(invoke_outdir)))
+
+            processes_to_launch = gpu_count * processes_per_gpu
+            print(f'>> Spawning {processes_to_launch} invokeai processes across {gpu_count} CUDA gpus', file=sys.stderr)
+            import ldm.invoke.CLI
+            parent_conn, child_conn = Pipe()
+            children = set()
+            for i in range(processes_to_launch):
+                p = Process(target=_run_invoke,
+                            args=(child_conn,
+                                  parent_conn,
+                                  invokeai_args,
+                                  i%gpu_count,
+                                  )
+                            )
+                p.start()
+                children.add(p)
+            child_conn.close()
+            sequence = 0
+            for command in commands:
+                sequence += 1
+                parent_conn.send(command+f' --fnformat=dp.{sequence:04}.{{prompt}}.png')
+            parent_conn.close()
        else:
-            _do_expand(conf)
+            for command in commands:
+                print(command)
    except KeyboardInterrupt:
-        process.kill()
+        for p in children:
+            p.terminate()

+class MessageToStdin(object):
+    def __init__(self, connection: Connection):
+        self.connection = connection
+        self.linebuffer = list()

+    def readline(self)->str:
+        try:
+            if len(self.linebuffer) == 0:
+                message = self.connection.recv()
+                self.linebuffer = message.split("\n")
+            result = self.linebuffer.pop(0)
+            return result
+        except EOFError:
+            return None
+
+class FilterStream(object):
+    def __init__(self, stream: TextIOBase, include: re.Pattern=None, exclude: re.Pattern=None):
+        self.stream = stream
+        self.include = include
+        self.exclude = exclude
+        
+    def write(self, data: str):
+        if self.include and self.include.match(data):
+            self.stream.write(data)
+            self.stream.flush()
+        elif self.exclude and not self.exclude.match(data):
+            self.stream.write(data)
+            self.stream.flush()
+
+    def flush(self):
+        self.stream.flush()
+    
+def _run_invoke(conn_in: Connection, conn_out: Connection, args: List[str], gpu: int=0):
+    print(f'>> Process {os.getpid()} running on GPU {gpu}', file=sys.stderr)
+    conn_out.close()
+    os.environ['CUDA_VISIBLE_DEVICES'] = f"{gpu}"
+    from ldm.invoke.CLI import main
+    sys.argv = args
+    sys.stdin = MessageToStdin(conn_in)
+    sys.stdout = FilterStream(sys.stdout,include=re.compile('^\[\d+\]'))
+    sys.stderr = FilterStream(sys.stdout,exclude=re.compile('^(>>|\s*\d+%|Fetching)'))
+    main()
+
+def _filter_output(stream: TextIOBase):
+    while line := stream.readline():
+        if re.match('^\[\d+\]',line):
+            print(line)
+    
 def main():
    parser = argparse.ArgumentParser(
        description=HELP,
@ -88,12 +165,12 @@ def main():
        dest="instructions",
        action="store_true",
        default=False,
-        help=f"Print verbose instructions.",
+        help="Print verbose instructions.",
    )
    parser.add_argument(
        "--invoke",
        action="store_true",
-        help="Execute invokeai using specified optional --model and --outdir",
+        help="Execute invokeai using specified optional --model, --processes_per_gpu and --outdir",
    )
    parser.add_argument(
        "--model",
@ -102,6 +179,12 @@ def main():
    parser.add_argument(
        "--outdir", type=Path, help="Write images and log into indicated directory"
    )
+    parser.add_argument(
+        "--processes_per_gpu",
+        type=int,
+        default=1,
+        help="When executing invokeai, how many parallel processes to execute per CUDA GPU.",
+    )
    opt = parser.parse_args()

    if opt.example:
@ -125,9 +208,10 @@ def main():
        run_invoke=opt.invoke,
        invoke_model=opt.model,
        invoke_outdir=opt.outdir,
+        processes_per_gpu=opt.processes_per_gpu,
    )

-def _do_expand(conf: OmegaConf, file: TextIOBase = sys.stdout):
+def expanded_invokeai_commands(conf: OmegaConf, always_switch_models: bool=False)->List[List[str]]:
    models = expand_values(conf.get("model"))
    steps = expand_values(conf.get("steps")) or [30]
    cfgs = expand_values(conf.get("cfg")) or [7.5]
@ -144,17 +228,17 @@ def _do_expand(conf: OmegaConf, file: TextIOBase = sys.stdout):
        *[models, seeds, prompts, samplers, cfgs, steps, perlin, threshold, init_img, strength, dimensions]
    )
    previous_model = None
+    
+    result = list()
    for p in cross_product:
        (model, seed, prompt, sampler, cfg, step, perlin, threshold, init_img, strength, dimensions) = tuple(p)
        (width, height) = dimensions.split("x")
-        if previous_model != model:
-            previous_model = model
-            print(f"!switch {model}", file=file)
+        switch_args = f"!switch {model}\n" if always_switch_models or previous_model != model else ''
        image_args = f'-I{init_img} -f{strength}' if init_img else ''
-        print(
-            f'"{prompt}" -S{seed} -A{sampler} -C{cfg} -s{step} {image_args} --perlin={perlin} --threshold={threshold} -W{width} -H{height}',
-            file=file,
-        )
+        command = f'{switch_args}{prompt} -S{seed} -A{sampler} -C{cfg} -s{step} {image_args} --perlin={perlin} --threshold={threshold} -W{width} -H{height}'
+        result.append(command)
+        previous_model = model
+    return result


 def expand_prompt(
@ -215,7 +299,7 @@ def _yaml_to_json(yaml_input: str) -> str:
    return json.dumps(data, indent=2)


-HELP = f"""
+HELP = """
 This script takes a prompt template file that contains multiple
 alternative values for the prompt and its generation arguments (such
 as steps). It then expands out the prompts using all combinations of