mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
remove support for batch_size from dream.py (#227)
* remove dream.py support for batch_size * expect to get a single image
This commit is contained in:
parent
b5565d2c82
commit
1714816fe2
@ -297,13 +297,13 @@ face enhancement (see previous section):
|
||||
```
|
||||
dream> a cute child playing hopscotch -G0.5
|
||||
[...]
|
||||
outputs/img-samples/000039.3498014304.png: "a cute child playing hopscotch" -s50 -b1 -W512 -H512 -C7.5 -mk_lms -S3498014304
|
||||
outputs/img-samples/000039.3498014304.png: "a cute child playing hopscotch" -s50 -W512 -H512 -C7.5 -mk_lms -S3498014304
|
||||
|
||||
# I wonder what it will look like if I bump up the steps and set facial enhancement to full strength?
|
||||
dream> a cute child playing hopscotch -G1.0 -s100 -S -1
|
||||
reusing previous seed 3498014304
|
||||
[...]
|
||||
outputs/img-samples/000040.3498014304.png: "a cute child playing hopscotch" -G1.0 -s100 -b1 -W512 -H512 -C7.5 -mk_lms -S3498014304
|
||||
outputs/img-samples/000040.3498014304.png: "a cute child playing hopscotch" -G1.0 -s100 -W512 -H512 -C7.5 -mk_lms -S3498014304
|
||||
```
|
||||
|
||||
## Weighted Prompts
|
||||
|
@ -117,7 +117,6 @@ class PromptFormatter:
|
||||
switches = list()
|
||||
switches.append(f'"{opt.prompt}"')
|
||||
switches.append(f'-s{opt.steps or t2i.steps}')
|
||||
switches.append(f'-b{opt.batch_size or t2i.batch_size}')
|
||||
switches.append(f'-W{opt.width or t2i.width}')
|
||||
switches.append(f'-H{opt.height or t2i.height}')
|
||||
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')
|
||||
|
@ -89,7 +89,6 @@ if readline_available:
|
||||
'--steps','-s',
|
||||
'--seed','-S',
|
||||
'--iterations','-n',
|
||||
'--batch_size','-b',
|
||||
'--width','-W','--height','-H',
|
||||
'--cfg_scale','-C',
|
||||
'--grid','-g',
|
||||
|
@ -140,8 +140,7 @@ class DreamServer(BaseHTTPRequestHandler):
|
||||
# since rendering images is moderately expensive, only render every 5th image
|
||||
# and don't bother with the last one, since it'll render anyway
|
||||
if progress_images and step % 5 == 0 and step < steps - 1:
|
||||
images = self.model._samples_to_images(sample)
|
||||
image = images[0]
|
||||
image = self.model._sample_to_image(sample)
|
||||
step_writer.write_image(image, seed) # TODO PngWriter to return path
|
||||
url = step_writer.filepath
|
||||
self.wfile.write(bytes(json.dumps(
|
||||
|
@ -39,7 +39,6 @@ from ldm.simplet2i import T2I
|
||||
t2i = T2I(model = <path> // models/ldm/stable-diffusion-v1/model.ckpt
|
||||
config = <path> // configs/stable-diffusion/v1-inference.yaml
|
||||
iterations = <integer> // how many times to run the sampling (1)
|
||||
batch_size = <integer> // how many images to generate per sampling (1)
|
||||
steps = <integer> // 50
|
||||
seed = <integer> // current system time
|
||||
sampler_name= ['ddim', 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms', 'plms'] // k_lms
|
||||
@ -98,7 +97,6 @@ class T2I:
|
||||
model
|
||||
config
|
||||
iterations
|
||||
batch_size
|
||||
steps
|
||||
seed
|
||||
sampler_name
|
||||
@ -116,7 +114,6 @@ class T2I:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
batch_size=1,
|
||||
iterations=1,
|
||||
steps=50,
|
||||
seed=None,
|
||||
@ -138,7 +135,6 @@ class T2I:
|
||||
latent_diffusion_weights=False,
|
||||
device='cuda',
|
||||
):
|
||||
self.batch_size = batch_size
|
||||
self.iterations = iterations
|
||||
self.width = width
|
||||
self.height = height
|
||||
@ -174,9 +170,7 @@ class T2I:
|
||||
Optional named arguments are the same as those passed to T2I and prompt2image()
|
||||
"""
|
||||
results = self.prompt2image(prompt, **kwargs)
|
||||
pngwriter = PngWriter(
|
||||
outdir, prompt, kwargs.get('batch_size', self.batch_size)
|
||||
)
|
||||
pngwriter = PngWriter(outdir, prompt)
|
||||
for r in results:
|
||||
pngwriter.write_image(r[0], r[1])
|
||||
return pngwriter.files_written
|
||||
@ -196,7 +190,6 @@ class T2I:
|
||||
self,
|
||||
# these are common
|
||||
prompt,
|
||||
batch_size=None,
|
||||
iterations=None,
|
||||
steps=None,
|
||||
seed=None,
|
||||
@ -222,8 +215,7 @@ class T2I:
|
||||
ldm.prompt2image() is the common entry point for txt2img() and img2img()
|
||||
It takes the following arguments:
|
||||
prompt // prompt string (no default)
|
||||
iterations // iterations (1); image count=iterations x batch_size
|
||||
batch_size // images per iteration (1)
|
||||
iterations // iterations (1); image count=iterations
|
||||
steps // refinement steps per iteration
|
||||
seed // seed for random number generator
|
||||
width // width of image, in multiples of 64 (512)
|
||||
@ -258,7 +250,6 @@ class T2I:
|
||||
height = height or self.height
|
||||
cfg_scale = cfg_scale or self.cfg_scale
|
||||
ddim_eta = ddim_eta or self.ddim_eta
|
||||
batch_size = batch_size or self.batch_size
|
||||
iterations = iterations or self.iterations
|
||||
strength = strength or self.strength
|
||||
self.log_tokenization = log_tokenization
|
||||
@ -297,7 +288,6 @@ class T2I:
|
||||
images_iterator = self._img2img(
|
||||
prompt,
|
||||
precision_scope=scope,
|
||||
batch_size=batch_size,
|
||||
steps=steps,
|
||||
cfg_scale=cfg_scale,
|
||||
ddim_eta=ddim_eta,
|
||||
@ -312,7 +302,6 @@ class T2I:
|
||||
images_iterator = self._txt2img(
|
||||
prompt,
|
||||
precision_scope=scope,
|
||||
batch_size=batch_size,
|
||||
steps=steps,
|
||||
cfg_scale=cfg_scale,
|
||||
ddim_eta=ddim_eta,
|
||||
@ -325,11 +314,10 @@ class T2I:
|
||||
with scope(self.device.type), self.model.ema_scope():
|
||||
for n in trange(iterations, desc='Generating'):
|
||||
seed_everything(seed)
|
||||
iter_images = next(images_iterator)
|
||||
for image in iter_images:
|
||||
results.append([image, seed])
|
||||
if image_callback is not None:
|
||||
image_callback(image, seed)
|
||||
image = next(images_iterator)
|
||||
results.append([image, seed])
|
||||
if image_callback is not None:
|
||||
image_callback(image, seed)
|
||||
seed = self._new_seed()
|
||||
|
||||
if upscale is not None or gfpgan_strength > 0:
|
||||
@ -399,7 +387,6 @@ class T2I:
|
||||
self,
|
||||
prompt,
|
||||
precision_scope,
|
||||
batch_size,
|
||||
steps,
|
||||
cfg_scale,
|
||||
ddim_eta,
|
||||
@ -415,16 +402,16 @@ class T2I:
|
||||
sampler = self.sampler
|
||||
|
||||
while True:
|
||||
uc, c = self._get_uc_and_c(prompt, batch_size, skip_normalize)
|
||||
uc, c = self._get_uc_and_c(prompt, skip_normalize)
|
||||
shape = [
|
||||
self.latent_channels,
|
||||
height // self.downsampling_factor,
|
||||
width // self.downsampling_factor,
|
||||
]
|
||||
samples, _ = sampler.sample(
|
||||
batch_size=1,
|
||||
S=steps,
|
||||
conditioning=c,
|
||||
batch_size=batch_size,
|
||||
shape=shape,
|
||||
verbose=False,
|
||||
unconditional_guidance_scale=cfg_scale,
|
||||
@ -432,14 +419,13 @@ class T2I:
|
||||
eta=ddim_eta,
|
||||
img_callback=callback
|
||||
)
|
||||
yield self._samples_to_images(samples)
|
||||
yield self._sample_to_image(samples)
|
||||
|
||||
@torch.no_grad()
|
||||
def _img2img(
|
||||
self,
|
||||
prompt,
|
||||
precision_scope,
|
||||
batch_size,
|
||||
steps,
|
||||
cfg_scale,
|
||||
ddim_eta,
|
||||
@ -464,7 +450,6 @@ class T2I:
|
||||
sampler = self.sampler
|
||||
|
||||
init_image = self._load_img(init_img,width,height).to(self.device)
|
||||
init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
|
||||
with precision_scope(self.device.type):
|
||||
init_latent = self.model.get_first_stage_encoding(
|
||||
self.model.encode_first_stage(init_image)
|
||||
@ -478,11 +463,11 @@ class T2I:
|
||||
# print(f"target t_enc is {t_enc} steps")
|
||||
|
||||
while True:
|
||||
uc, c = self._get_uc_and_c(prompt, batch_size, skip_normalize)
|
||||
uc, c = self._get_uc_and_c(prompt, skip_normalize)
|
||||
|
||||
# encode (scaled latent)
|
||||
z_enc = sampler.stochastic_encode(
|
||||
init_latent, torch.tensor([t_enc] * batch_size).to(self.device)
|
||||
init_latent, torch.tensor([t_enc]).to(self.device)
|
||||
)
|
||||
# decode it
|
||||
samples = sampler.decode(
|
||||
@ -493,12 +478,12 @@ class T2I:
|
||||
unconditional_guidance_scale=cfg_scale,
|
||||
unconditional_conditioning=uc,
|
||||
)
|
||||
yield self._samples_to_images(samples)
|
||||
yield self._sample_to_image(samples)
|
||||
|
||||
# TODO: does this actually need to run every loop? does anything in it vary by random seed?
|
||||
def _get_uc_and_c(self, prompt, batch_size, skip_normalize):
|
||||
def _get_uc_and_c(self, prompt, skip_normalize):
|
||||
|
||||
uc = self.model.get_learned_conditioning(batch_size * [''])
|
||||
uc = self.model.get_learned_conditioning([''])
|
||||
|
||||
# weighted sub-prompts
|
||||
subprompts, weights = T2I._split_weighted_subprompts(prompt)
|
||||
@ -515,27 +500,23 @@ class T2I:
|
||||
self._log_tokenization(subprompts[i])
|
||||
c = torch.add(
|
||||
c,
|
||||
self.model.get_learned_conditioning(
|
||||
batch_size * [subprompts[i]]
|
||||
),
|
||||
self.model.get_learned_conditioning([subprompts[i]]),
|
||||
alpha=weight,
|
||||
)
|
||||
else: # just standard 1 prompt
|
||||
self._log_tokenization(prompt)
|
||||
c = self.model.get_learned_conditioning(batch_size * [prompt])
|
||||
c = self.model.get_learned_conditioning([prompt])
|
||||
return (uc, c)
|
||||
|
||||
def _samples_to_images(self, samples):
|
||||
def _sample_to_image(self, samples):
|
||||
x_samples = self.model.decode_first_stage(samples)
|
||||
x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
images = list()
|
||||
for x_sample in x_samples:
|
||||
x_sample = 255.0 * rearrange(
|
||||
x_sample.cpu().numpy(), 'c h w -> h w c'
|
||||
)
|
||||
image = Image.fromarray(x_sample.astype(np.uint8))
|
||||
images.append(image)
|
||||
return images
|
||||
if len(x_samples) != 1:
|
||||
raise Exception(f'expected to get a single image, but got {len(x_samples)}')
|
||||
x_sample = 255.0 * rearrange(
|
||||
x_samples[0].cpu().numpy(), 'c h w -> h w c'
|
||||
)
|
||||
return Image.fromarray(x_sample.astype(np.uint8))
|
||||
|
||||
def _new_seed(self):
|
||||
self.seed = random.randrange(0, np.iinfo(np.uint32).max)
|
||||
|
@ -199,7 +199,7 @@ def main_loop(t2i, outdir, prompt_as_dir, parser, infile):
|
||||
|
||||
# Here is where the images are actually generated!
|
||||
try:
|
||||
file_writer = PngWriter(current_outdir, normalized_prompt, opt.batch_size)
|
||||
file_writer = PngWriter(current_outdir, normalized_prompt)
|
||||
callback = file_writer.write_image if individual_images else None
|
||||
image_list = t2i.prompt2image(image_callback=callback, **vars(opt))
|
||||
results = (
|
||||
@ -419,13 +419,6 @@ def create_cmd_parser():
|
||||
default=1,
|
||||
help='Number of samplings to perform (slower, but will provide seeds for individual images)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-b',
|
||||
'--batch_size',
|
||||
type=int,
|
||||
default=1,
|
||||
help='Number of images to produce per sampling (will not provide seeds for individual images!)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-W', '--width', type=int, help='Image width, multiple of 64'
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user