remove support for batch_size from dream.py (#227)

* remove dream.py support for batch_size

* expect to get a single image
This commit is contained in:
Kevin Gibbons 2022-08-30 19:30:12 -07:00 committed by GitHub
parent b5565d2c82
commit 1714816fe2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 27 additions and 56 deletions

View File

@ -297,13 +297,13 @@ face enhancement (see previous section):
```
dream> a cute child playing hopscotch -G0.5
[...]
outputs/img-samples/000039.3498014304.png: "a cute child playing hopscotch" -s50 -b1 -W512 -H512 -C7.5 -mk_lms -S3498014304
outputs/img-samples/000039.3498014304.png: "a cute child playing hopscotch" -s50 -W512 -H512 -C7.5 -mk_lms -S3498014304
# I wonder what it will look like if I bump up the steps and set facial enhancement to full strength?
dream> a cute child playing hopscotch -G1.0 -s100 -S -1
reusing previous seed 3498014304
[...]
outputs/img-samples/000040.3498014304.png: "a cute child playing hopscotch" -G1.0 -s100 -b1 -W512 -H512 -C7.5 -mk_lms -S3498014304
outputs/img-samples/000040.3498014304.png: "a cute child playing hopscotch" -G1.0 -s100 -W512 -H512 -C7.5 -mk_lms -S3498014304
```
## Weighted Prompts

View File

@ -117,7 +117,6 @@ class PromptFormatter:
switches = list()
switches.append(f'"{opt.prompt}"')
switches.append(f'-s{opt.steps or t2i.steps}')
switches.append(f'-b{opt.batch_size or t2i.batch_size}')
switches.append(f'-W{opt.width or t2i.width}')
switches.append(f'-H{opt.height or t2i.height}')
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')

View File

@ -89,7 +89,6 @@ if readline_available:
'--steps','-s',
'--seed','-S',
'--iterations','-n',
'--batch_size','-b',
'--width','-W','--height','-H',
'--cfg_scale','-C',
'--grid','-g',

View File

@ -140,8 +140,7 @@ class DreamServer(BaseHTTPRequestHandler):
# since rendering images is moderately expensive, only render every 5th image
# and don't bother with the last one, since it'll render anyway
if progress_images and step % 5 == 0 and step < steps - 1:
images = self.model._samples_to_images(sample)
image = images[0]
image = self.model._sample_to_image(sample)
step_writer.write_image(image, seed) # TODO PngWriter to return path
url = step_writer.filepath
self.wfile.write(bytes(json.dumps(

View File

@ -39,7 +39,6 @@ from ldm.simplet2i import T2I
t2i = T2I(model = <path> // models/ldm/stable-diffusion-v1/model.ckpt
config = <path> // configs/stable-diffusion/v1-inference.yaml
iterations = <integer> // how many times to run the sampling (1)
batch_size = <integer> // how many images to generate per sampling (1)
steps = <integer> // 50
seed = <integer> // current system time
sampler_name= ['ddim', 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms', 'plms'] // k_lms
@ -98,7 +97,6 @@ class T2I:
model
config
iterations
batch_size
steps
seed
sampler_name
@ -116,7 +114,6 @@ class T2I:
def __init__(
self,
batch_size=1,
iterations=1,
steps=50,
seed=None,
@ -138,7 +135,6 @@ class T2I:
latent_diffusion_weights=False,
device='cuda',
):
self.batch_size = batch_size
self.iterations = iterations
self.width = width
self.height = height
@ -174,9 +170,7 @@ class T2I:
Optional named arguments are the same as those passed to T2I and prompt2image()
"""
results = self.prompt2image(prompt, **kwargs)
pngwriter = PngWriter(
outdir, prompt, kwargs.get('batch_size', self.batch_size)
)
pngwriter = PngWriter(outdir, prompt)
for r in results:
pngwriter.write_image(r[0], r[1])
return pngwriter.files_written
@ -196,7 +190,6 @@ class T2I:
self,
# these are common
prompt,
batch_size=None,
iterations=None,
steps=None,
seed=None,
@ -222,8 +215,7 @@ class T2I:
ldm.prompt2image() is the common entry point for txt2img() and img2img()
It takes the following arguments:
prompt // prompt string (no default)
iterations // iterations (1); image count=iterations x batch_size
batch_size // images per iteration (1)
iterations // iterations (1); image count=iterations
steps // refinement steps per iteration
seed // seed for random number generator
width // width of image, in multiples of 64 (512)
@ -258,7 +250,6 @@ class T2I:
height = height or self.height
cfg_scale = cfg_scale or self.cfg_scale
ddim_eta = ddim_eta or self.ddim_eta
batch_size = batch_size or self.batch_size
iterations = iterations or self.iterations
strength = strength or self.strength
self.log_tokenization = log_tokenization
@ -297,7 +288,6 @@ class T2I:
images_iterator = self._img2img(
prompt,
precision_scope=scope,
batch_size=batch_size,
steps=steps,
cfg_scale=cfg_scale,
ddim_eta=ddim_eta,
@ -312,7 +302,6 @@ class T2I:
images_iterator = self._txt2img(
prompt,
precision_scope=scope,
batch_size=batch_size,
steps=steps,
cfg_scale=cfg_scale,
ddim_eta=ddim_eta,
@ -325,11 +314,10 @@ class T2I:
with scope(self.device.type), self.model.ema_scope():
for n in trange(iterations, desc='Generating'):
seed_everything(seed)
iter_images = next(images_iterator)
for image in iter_images:
results.append([image, seed])
if image_callback is not None:
image_callback(image, seed)
image = next(images_iterator)
results.append([image, seed])
if image_callback is not None:
image_callback(image, seed)
seed = self._new_seed()
if upscale is not None or gfpgan_strength > 0:
@ -399,7 +387,6 @@ class T2I:
self,
prompt,
precision_scope,
batch_size,
steps,
cfg_scale,
ddim_eta,
@ -415,16 +402,16 @@ class T2I:
sampler = self.sampler
while True:
uc, c = self._get_uc_and_c(prompt, batch_size, skip_normalize)
uc, c = self._get_uc_and_c(prompt, skip_normalize)
shape = [
self.latent_channels,
height // self.downsampling_factor,
width // self.downsampling_factor,
]
samples, _ = sampler.sample(
batch_size=1,
S=steps,
conditioning=c,
batch_size=batch_size,
shape=shape,
verbose=False,
unconditional_guidance_scale=cfg_scale,
@ -432,14 +419,13 @@ class T2I:
eta=ddim_eta,
img_callback=callback
)
yield self._samples_to_images(samples)
yield self._sample_to_image(samples)
@torch.no_grad()
def _img2img(
self,
prompt,
precision_scope,
batch_size,
steps,
cfg_scale,
ddim_eta,
@ -464,7 +450,6 @@ class T2I:
sampler = self.sampler
init_image = self._load_img(init_img,width,height).to(self.device)
init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
with precision_scope(self.device.type):
init_latent = self.model.get_first_stage_encoding(
self.model.encode_first_stage(init_image)
@ -478,11 +463,11 @@ class T2I:
# print(f"target t_enc is {t_enc} steps")
while True:
uc, c = self._get_uc_and_c(prompt, batch_size, skip_normalize)
uc, c = self._get_uc_and_c(prompt, skip_normalize)
# encode (scaled latent)
z_enc = sampler.stochastic_encode(
init_latent, torch.tensor([t_enc] * batch_size).to(self.device)
init_latent, torch.tensor([t_enc]).to(self.device)
)
# decode it
samples = sampler.decode(
@ -493,12 +478,12 @@ class T2I:
unconditional_guidance_scale=cfg_scale,
unconditional_conditioning=uc,
)
yield self._samples_to_images(samples)
yield self._sample_to_image(samples)
# TODO: does this actually need to run every loop? does anything in it vary by random seed?
def _get_uc_and_c(self, prompt, batch_size, skip_normalize):
def _get_uc_and_c(self, prompt, skip_normalize):
uc = self.model.get_learned_conditioning(batch_size * [''])
uc = self.model.get_learned_conditioning([''])
# weighted sub-prompts
subprompts, weights = T2I._split_weighted_subprompts(prompt)
@ -515,27 +500,23 @@ class T2I:
self._log_tokenization(subprompts[i])
c = torch.add(
c,
self.model.get_learned_conditioning(
batch_size * [subprompts[i]]
),
self.model.get_learned_conditioning([subprompts[i]]),
alpha=weight,
)
else: # just standard 1 prompt
self._log_tokenization(prompt)
c = self.model.get_learned_conditioning(batch_size * [prompt])
c = self.model.get_learned_conditioning([prompt])
return (uc, c)
def _samples_to_images(self, samples):
def _sample_to_image(self, samples):
x_samples = self.model.decode_first_stage(samples)
x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
images = list()
for x_sample in x_samples:
x_sample = 255.0 * rearrange(
x_sample.cpu().numpy(), 'c h w -> h w c'
)
image = Image.fromarray(x_sample.astype(np.uint8))
images.append(image)
return images
if len(x_samples) != 1:
raise Exception(f'expected to get a single image, but got {len(x_samples)}')
x_sample = 255.0 * rearrange(
x_samples[0].cpu().numpy(), 'c h w -> h w c'
)
return Image.fromarray(x_sample.astype(np.uint8))
def _new_seed(self):
self.seed = random.randrange(0, np.iinfo(np.uint32).max)

View File

@ -199,7 +199,7 @@ def main_loop(t2i, outdir, prompt_as_dir, parser, infile):
# Here is where the images are actually generated!
try:
file_writer = PngWriter(current_outdir, normalized_prompt, opt.batch_size)
file_writer = PngWriter(current_outdir, normalized_prompt)
callback = file_writer.write_image if individual_images else None
image_list = t2i.prompt2image(image_callback=callback, **vars(opt))
results = (
@ -419,13 +419,6 @@ def create_cmd_parser():
default=1,
help='Number of samplings to perform (slower, but will provide seeds for individual images)',
)
parser.add_argument(
'-b',
'--batch_size',
type=int,
default=1,
help='Number of images to produce per sampling (will not provide seeds for individual images!)',
)
parser.add_argument(
'-W', '--width', type=int, help='Image width, multiple of 64'
)