diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 6d33319654..4e5b97919f 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -328,7 +328,6 @@ class TextToLatentsInvocation(BaseInvocation): latents_shape=noise.shape, do_classifier_free_guidance=(self.cfg_scale >= 1.0)) - # TODO: Verify the noise is the right size result_latents, result_attention_map_saver = model.latents_from_embeddings( latents=torch.zeros_like(noise, dtype=torch_dtype(model.device)), diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 41146c8004..ec2902e4d6 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -1031,6 +1031,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): dtype=torch.float16, do_classifier_free_guidance=True, ): + if not isinstance(image, torch.Tensor): if isinstance(image, PIL.Image.Image): image = [image]