Merge branch 'main' into lstein/new-model-manager

2024-08-30 20:32:17 +00:00 · 2023-05-13 22:01:34 -04:00
parent b31a6ff605 47a088d685
commit baf5451fa0
79 changed files with 717 additions and 550 deletions
--- a/invokeai/app/invocations/generate.py
+++ b/invokeai/app/invocations/generate.py
@ -51,7 +51,7 @@ class TextToImageInvocation(BaseInvocation, SDImageInvocation):
    width:       int = Field(default=512, multiple_of=8, gt=0, description="The width of the resulting image", )
    height:      int = Field(default=512, multiple_of=8, gt=0, description="The height of the resulting image", )
    cfg_scale: float = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
-    scheduler: SAMPLER_NAME_VALUES = Field(default="k_lms", description="The scheduler to use" )
+    scheduler: SAMPLER_NAME_VALUES = Field(default="lms", description="The scheduler to use" )
    model:       str = Field(default="", description="The model to use (currently ignored)")
    # fmt: on

--- a/invokeai/app/invocations/image.py
+++ b/invokeai/app/invocations/image.py
@ -33,8 +33,8 @@ class ImageOutput(BaseInvocationOutput):
    # fmt: off
    type: Literal["image"] = "image"
    image:      ImageField = Field(default=None, description="The output image")
-    width:   Optional[int] = Field(default=None, description="The width of the image in pixels")
-    height:  Optional[int] = Field(default=None, description="The height of the image in pixels")
+    width:             int = Field(description="The width of the image in pixels")
+    height:            int = Field(description="The height of the image in pixels")
    # fmt: on

    class Config:
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -14,6 +14,7 @@ from invokeai.app.util.misc import SEED_MAX, get_random_seed
 from invokeai.app.util.step_callback import stable_diffusion_step_callback

 from ...backend.image_util.seamless import configure_model_padding
+
 from ...backend.stable_diffusion import PipelineIntermediateState
 from ...backend.stable_diffusion.diffusers_pipeline import (
    ConditioningData, StableDiffusionGeneratorPipeline,
@ -21,6 +22,10 @@ from ...backend.stable_diffusion.diffusers_pipeline import (
 from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import \
    PostprocessingSettings
 from ...backend.util.devices import choose_torch_device, torch_dtype
+from ...backend.prompting.conditioning import get_uc_and_c_and_ec
+from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
+from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
+import numpy as np
 from ..services.image_storage import ImageType
 from .baseinvocation import (BaseInvocation, BaseInvocationOutput,
                             InvocationConfig, InvocationContext)
@ -42,48 +47,59 @@ class LatentsField(BaseModel):
 class LatentsOutput(BaseInvocationOutput):
    """Base class for invocations that output latents"""
    #fmt: off
-    type: Literal["latent_output"] = "latent_output"
-    latents: LatentsField            = Field(default=None, description="The output latents")
+    type: Literal["latents_output"] = "latents_output"
+
+    # Inputs
+    latents: LatentsField          = Field(default=None, description="The output latents")
+    width:                     int = Field(description="The width of the latents in pixels")
+    height:                    int = Field(description="The height of the latents in pixels")
    #fmt: on

+
+def build_latents_output(latents_name: str, latents: torch.Tensor):
+      return LatentsOutput(
+          latents=LatentsField(latents_name=latents_name),
+          width=latents.size()[3] * 8,
+          height=latents.size()[2] * 8,
+      )
+
 class NoiseOutput(BaseInvocationOutput):
    """Invocation noise output"""
    #fmt: off
-    type: Literal["noise_output"] = "noise_output"
+    type:  Literal["noise_output"] = "noise_output"
+
+    # Inputs
    noise: LatentsField            = Field(default=None, description="The output noise")
+    width:                     int = Field(description="The width of the noise in pixels")
+    height:                    int = Field(description="The height of the noise in pixels")
    #fmt: on

-
-# TODO: this seems like a hack
-scheduler_map = dict(
-    ddim=diffusers.DDIMScheduler,
-    dpmpp_2=diffusers.DPMSolverMultistepScheduler,
-    k_dpm_2=diffusers.KDPM2DiscreteScheduler,
-    k_dpm_2_a=diffusers.KDPM2AncestralDiscreteScheduler,
-    k_dpmpp_2=diffusers.DPMSolverMultistepScheduler,
-    k_euler=diffusers.EulerDiscreteScheduler,
-    k_euler_a=diffusers.EulerAncestralDiscreteScheduler,
-    k_heun=diffusers.HeunDiscreteScheduler,
-    k_lms=diffusers.LMSDiscreteScheduler,
-    plms=diffusers.PNDMScheduler,
-)
+def build_noise_output(latents_name: str, latents: torch.Tensor):
+      return NoiseOutput(
+          noise=LatentsField(latents_name=latents_name),
+          width=latents.size()[3] * 8,
+          height=latents.size()[2] * 8,
+      )


 SAMPLER_NAME_VALUES = Literal[
-    tuple(list(scheduler_map.keys()))
+    tuple(list(SCHEDULER_MAP.keys()))
 ]


+
 def get_scheduler(
    context: InvocationContext,
    scheduler_info: ModelInfo,
    scheduler_name: str,
 ) -> Scheduler:
+    scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP['ddim'])
    orig_scheduler_info = context.services.model_manager.get_model(**scheduler_info.dict())
    with orig_scheduler_info as orig_scheduler:
        scheduler_config = orig_scheduler.config
-
-    scheduler_class = scheduler_map.get(scheduler_name,'ddim')
+    if "_backup" in scheduler_config:
+        scheduler_config = scheduler_config["_backup"]
+    scheduler_config = {**scheduler_config, **scheduler_extra_config, "_backup": scheduler_config}
    scheduler = scheduler_class.from_config(scheduler_config)
    # hack copied over from generate.py
    if not hasattr(scheduler, 'uses_inpainting_model'):
@ -139,9 +155,7 @@ class NoiseInvocation(BaseInvocation):

        name = f'{context.graph_execution_state_id}__{self.id}'
        context.services.latents.set(name, noise)
-        return NoiseOutput(
-            noise=LatentsField(latents_name=name)
-        )
+        return build_noise_output(latents_name=name, latents=noise)


 # Text to image
@ -157,7 +171,8 @@ class TextToLatentsInvocation(BaseInvocation):
    noise: Optional[LatentsField] = Field(description="The noise to use")
    steps:       int = Field(default=10, gt=0, description="The number of steps to use to generate the image")
    cfg_scale: float = Field(default=7.5, gt=0, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
-    scheduler: SAMPLER_NAME_VALUES = Field(default="k_lms", description="The scheduler to use" )
+    scheduler: SAMPLER_NAME_VALUES = Field(default="lms", description="The scheduler to use" )
+    model:       str = Field(default="", description="The model to use (currently ignored)")
    seamless:   bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
    seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")

@ -264,9 +279,7 @@ class TextToLatentsInvocation(BaseInvocation):

        name = f'{context.graph_execution_state_id}__{self.id}'
        context.services.latents.set(name, result_latents)
-        return LatentsOutput(
-            latents=LatentsField(latents_name=name)
-        )
+        return build_latents_output(latents_name=name, latents=result_latents)


 class LatentsToLatentsInvocation(TextToLatentsInvocation):
@ -337,9 +350,7 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):

        name = f'{context.graph_execution_state_id}__{self.id}'
        context.services.latents.set(name, result_latents)
-        return LatentsOutput(
-            latents=LatentsField(latents_name=name)
-        )
+        return build_latents_output(latents_name=name, latents=result_latents)


 # Latent to image
@ -417,11 +428,11 @@ class ResizeLatentsInvocation(BaseInvocation):
    type: Literal["lresize"] = "lresize"

    # Inputs
-    latents: Optional[LatentsField]             = Field(description="The latents to resize")
-    width: int                                  = Field(ge=64, multiple_of=8, description="The width to resize to (px)")
-    height: int                                 = Field(ge=64, multiple_of=8, description="The height to resize to (px)")
-    mode: Optional[LATENTS_INTERPOLATION_MODE]  = Field(default="bilinear", description="The interpolation mode")
-    antialias: Optional[bool]                   = Field(default=False, description="Whether or not to antialias (applied in bilinear and bicubic modes only)")
+    latents:    Optional[LatentsField] = Field(description="The latents to resize")
+    width:                         int = Field(ge=64, multiple_of=8, description="The width to resize to (px)")
+    height:                        int = Field(ge=64, multiple_of=8, description="The height to resize to (px)")
+    mode:   LATENTS_INTERPOLATION_MODE = Field(default="bilinear", description="The interpolation mode")
+    antialias:                    bool = Field(default=False, description="Whether or not to antialias (applied in bilinear and bicubic modes only)")

    def invoke(self, context: InvocationContext) -> LatentsOutput:
        latents = context.services.latents.get(self.latents.latents_name)
@ -438,7 +449,7 @@ class ResizeLatentsInvocation(BaseInvocation):

        name = f"{context.graph_execution_state_id}__{self.id}"
        context.services.latents.set(name, resized_latents)
-        return LatentsOutput(latents=LatentsField(latents_name=name))
+        return build_latents_output(latents_name=name, latents=resized_latents)


 class ScaleLatentsInvocation(BaseInvocation):
@ -447,10 +458,10 @@ class ScaleLatentsInvocation(BaseInvocation):
    type: Literal["lscale"] = "lscale"

    # Inputs
-    latents: Optional[LatentsField]             = Field(description="The latents to scale")
-    scale_factor: float                         = Field(gt=0, description="The factor by which to scale the latents")
-    mode: Optional[LATENTS_INTERPOLATION_MODE]  = Field(default="bilinear", description="The interpolation mode")
-    antialias: Optional[bool]                   = Field(default=False, description="Whether or not to antialias (applied in bilinear and bicubic modes only)")
+    latents:   Optional[LatentsField] = Field(description="The latents to scale")
+    scale_factor:               float = Field(gt=0, description="The factor by which to scale the latents")
+    mode:  LATENTS_INTERPOLATION_MODE = Field(default="bilinear", description="The interpolation mode")
+    antialias:                   bool = Field(default=False, description="Whether or not to antialias (applied in bilinear and bicubic modes only)")

    def invoke(self, context: InvocationContext) -> LatentsOutput:
        latents = context.services.latents.get(self.latents.latents_name)
@ -468,7 +479,7 @@ class ScaleLatentsInvocation(BaseInvocation):

        name = f"{context.graph_execution_state_id}__{self.id}"
        context.services.latents.set(name, resized_latents)
-        return LatentsOutput(latents=LatentsField(latents_name=name))
+        return build_latents_output(latents_name=name, latents=resized_latents)


 class ImageToLatentsInvocation(BaseInvocation):
@ -522,4 +533,4 @@ class ImageToLatentsInvocation(BaseInvocation):

        name = f"{context.graph_execution_state_id}__{self.id}"
        context.services.latents.set(name, latents)
-        return LatentsOutput(latents=LatentsField(latents_name=name))
+        return build_latents_output(latents_name=name, latents=latents)
--- a/invokeai/app/invocations/math.py
+++ b/invokeai/app/invocations/math.py
@ -3,6 +3,7 @@
 from typing import Literal

 from pydantic import BaseModel, Field
+import numpy as np

 from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig

@ -73,3 +74,12 @@ class DivideInvocation(BaseInvocation, MathInvocationConfig):

    def invoke(self, context: InvocationContext) -> IntOutput:
        return IntOutput(a=int(self.a / self.b))
+
+
+class RandomIntInvocation(BaseInvocation):
+    """Outputs a single random integer."""
+    #fmt: off
+    type: Literal["rand_int"] = "rand_int"
+    #fmt: on
+    def invoke(self, context: InvocationContext) -> IntOutput:
+        return IntOutput(a=np.random.randint(0, np.iinfo(np.int32).max))
--- a/invokeai/app/services/default_graphs.py
+++ b/invokeai/app/services/default_graphs.py
@ -48,13 +48,14 @@ def create_text_to_image() -> LibraryGraph:

 def create_system_graphs(graph_library: ItemStorageABC[LibraryGraph]) -> list[LibraryGraph]:
    """Creates the default system graphs, or adds new versions if the old ones don't match"""
-
+    
+    # TODO: Uncomment this when we are ready to fix this up to prevent breaking changes
    graphs: list[LibraryGraph] = list()

    # text_to_image = graph_library.get(default_text_to_image_graph_id)
    
-    # TODO: Check if the graph is the same as the default one, and if not, update it
-    #if text_to_image is None:
+    # # TODO: Check if the graph is the same as the default one, and if not, update it
+    # #if text_to_image is None:
    text_to_image = create_text_to_image()
    graph_library.set(text_to_image)

--- a/invokeai/app/services/processor.py
+++ b/invokeai/app/services/processor.py
@ -1,3 +1,4 @@
+import time
 import traceback
 from threading import Event, Thread, BoundedSemaphore

@ -6,6 +7,7 @@ from .invocation_queue import InvocationQueueItem
 from .invoker import InvocationProcessorABC, Invoker
 from ..models.exceptions import CanceledException

+import invokeai.backend.util.logging as logger
 class DefaultInvocationProcessor(InvocationProcessorABC):
    __invoker_thread: Thread
    __stop_event: Event
@ -34,8 +36,14 @@ class DefaultInvocationProcessor(InvocationProcessorABC):
        try:
            self.__threadLimit.acquire()
            while not stop_event.is_set():
-                queue_item: InvocationQueueItem = self.__invoker.services.queue.get()
+                try:
+                    queue_item: InvocationQueueItem = self.__invoker.services.queue.get()
+                except Exception as e:
+                    logger.debug("Exception while getting from queue: %s" % e)
+
                if not queue_item:  # Probably stopping
+                    # do not hammer the queue
+                    time.sleep(0.5)
                    continue

                graph_execution_state = (
@ -124,7 +132,16 @@ class DefaultInvocationProcessor(InvocationProcessorABC):
                # Queue any further commands if invoking all
                is_complete = graph_execution_state.is_complete()
                if queue_item.invoke_all and not is_complete:
-                    self.__invoker.invoke(graph_execution_state, invoke_all=True)
+                    try:
+                        self.__invoker.invoke(graph_execution_state, invoke_all=True)
+                    except Exception as e:
+                        logger.error("Error while invoking: %s" % e)
+                        self.__invoker.services.events.emit_invocation_error(
+                            graph_execution_state_id=graph_execution_state.id,
+                            node=invocation.dict(),
+                            source_node_id=source_node_id,
+                            error=traceback.format_exc()
+                        )
                elif is_complete:
                    self.__invoker.services.events.emit_graph_execution_complete(
                        graph_execution_state.id