diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index f0ad850bb3..35e05934ff 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -366,6 +366,10 @@ class TextToLatentsInvocation(BaseInvocation): ) ) + latents_shape = noise.shape + control_height_resize = latents_shape[2] * 8 + control_width_resize = latents_shape[3] * 8 + # copied from old backend/txt2img.py # FIXME: still need to test with different widths, heights, devices, dtypes # and add in batch_size, num_images_per_prompt? @@ -375,10 +379,8 @@ class TextToLatentsInvocation(BaseInvocation): image=control_image, # do_classifier_free_guidance=do_classifier_free_guidance, do_classifier_free_guidance=True, - # width=width, - # height=height, - width=512, - height=512, + width=control_width_resize, + height=control_height_resize, # batch_size=batch_size * num_images_per_prompt, # num_images_per_prompt=num_images_per_prompt, device=control_model.device, @@ -391,10 +393,8 @@ class TextToLatentsInvocation(BaseInvocation): image=image_, # do_classifier_free_guidance=do_classifier_free_guidance, do_classifier_free_guidance=True, - # width=width, - # height=height, - width=512, - height=512, + width=control_width_resize, + height=control_height_resize, # batch_size=batch_size * num_images_per_prompt, # num_images_per_prompt=num_images_per_prompt, device=control_model.device, @@ -403,8 +403,6 @@ class TextToLatentsInvocation(BaseInvocation): images.append(image_) control_image = images - - # TODO: Verify the noise is the right size result_latents, result_attention_map_saver = model.latents_from_embeddings( latents=torch.zeros_like(noise, dtype=torch_dtype(model.device)), diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 1fe20e6361..9656fe7eee 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -1030,6 +1030,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): dtype=torch.float16, do_classifier_free_guidance=True, ): + if not isinstance(image, torch.Tensor): if isinstance(image, PIL.Image.Image): image = [image]