diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index e9b1425464..fad875cdbb 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -286,6 +286,10 @@ class TextToLatentsInvocation(BaseInvocation): ) ) + latents_shape = noise.shape + control_height_resize = latents_shape[2] * 8 + control_width_resize = latents_shape[3] * 8 + # copied from old backend/txt2img.py # FIXME: still need to test with different widths, heights, devices, dtypes # and add in batch_size, num_images_per_prompt? @@ -295,10 +299,8 @@ class TextToLatentsInvocation(BaseInvocation): image=control_image, # do_classifier_free_guidance=do_classifier_free_guidance, do_classifier_free_guidance=True, - # width=width, - # height=height, - width=512, - height=512, + width=control_width_resize, + height=control_height_resize, # batch_size=batch_size * num_images_per_prompt, # num_images_per_prompt=num_images_per_prompt, device=control_model.device, @@ -311,10 +313,8 @@ class TextToLatentsInvocation(BaseInvocation): image=image_, # do_classifier_free_guidance=do_classifier_free_guidance, do_classifier_free_guidance=True, - # width=width, - # height=height, - width=512, - height=512, + width=control_width_resize, + height=control_height_resize, # batch_size=batch_size * num_images_per_prompt, # num_images_per_prompt=num_images_per_prompt, device=control_model.device, @@ -323,10 +323,7 @@ class TextToLatentsInvocation(BaseInvocation): images.append(image_) control_image = images - - # TODO: Verify the noise is the right size - result_latents, result_attention_map_saver = model.latents_from_embeddings( latents=torch.zeros_like(noise, dtype=torch_dtype(model.device)), noise=noise, diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 7d88b0e07a..e24f11fd3d 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -994,14 +994,16 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): self, image, # FIXME: need to fix hardwiring of width and height, change to basing on latents dimensions? - width=512, - height=512, + # latents, + width=512, # should be 8 * latent.shape[3] + height=512, # should be 8 * latent height[2] batch_size=1, num_images_per_prompt=1, device="cuda", dtype=torch.float16, do_classifier_free_guidance=True, ): + if not isinstance(image, torch.Tensor): if isinstance(image, PIL.Image.Image): image = [image]