Add denoise_end param to FluxDenoiseInvocation.

2024-08-30 20:32:17 +00:00 · 2024-08-30 19:13:20 +00:00 · 2024-08-30 19:13:20 +00:00 · 6675aaba4c
commit 6675aaba4c
parent 661c9db7ac
3 changed files with 90 additions and 5 deletions
--- a/invokeai/app/invocations/flux_text_to_image.py
+++ b/invokeai/app/invocations/flux_text_to_image.py
@ -23,6 +23,7 @@ from invokeai.backend.flux.denoise import denoise
 from invokeai.backend.flux.inpaint_extension import InpaintExtension
 from invokeai.backend.flux.model import Flux
 from invokeai.backend.flux.sampling_utils import (
+    clip_timestep_schedule,
    generate_img_ids,
    get_noise,
    get_schedule,
@ -62,6 +63,7 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
        le=1,
        description=FieldDescriptions.denoising_start,
    )
+    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
    transformer: TransformerField = InputField(
        description=FieldDescriptions.flux_model,
        input=Input.Connection,
@ -130,6 +132,9 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
            shift=not is_schnell,
        )

+        # Clip the timesteps schedule based on denoising_start and denoising_end.
+        timesteps = clip_timestep_schedule(timesteps, self.denoising_start, self.denoising_end)
+
        # Prepare input latent image.
        if init_latents is not None:
            # If init_latents is provided, we are doing image-to-image.
@ -140,11 +145,6 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                    "to be poor. Consider using a FLUX dev model instead."
                )

-            # Clip the timesteps schedule based on denoising_start.
-            # TODO(ryand): Should we apply denoising_start in timestep-space rather than timestep-index-space?
-            start_idx = int(self.denoising_start * len(timesteps))
-            timesteps = timesteps[start_idx:]
-
            # Noise the orig_latents by the appropriate amount for the first timestep.
            t_0 = timesteps[0]
            x = t_0 * noise + (1.0 - t_0) * init_latents
@ -155,6 +155,11 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):

            x = noise

+        # If len(timesteps) == 1, then short-circuit. We are just noising the input latents, but not taking any
+        # denoising steps.
+        if len(timesteps) <= 1:
+            return x
+
        inpaint_mask = self._prep_inpaint_mask(context, x)

        b, _c, h, w = x.shape
--- a/invokeai/backend/flux/sampling_utils.py
+++ b/invokeai/backend/flux/sampling_utils.py
@ -59,6 +59,44 @@ def get_schedule(
    return timesteps.tolist()


+def _find_last_index_ge_val(timesteps: list[float], val: float, eps: float = 1e-6) -> int:
+    """Find the last index in timesteps that is >= val.
+
+    We use epsilon-close equality to avoid potential floating point errors.
+    """
+    idx = len(list(filter(lambda t: t >= (val - eps), timesteps))) - 1
+    assert idx >= 0
+    return idx
+
+
+def clip_timestep_schedule(timesteps: list[float], denoising_start: float, denoising_end: float) -> list[float]:
+    """Clip the timestep schedule to the denoising range.
+
+    Args:
+        timesteps (list[float]): The original timestep schedule: [1.0, ..., 0.0].
+        denoising_start (float): A value in [0, 1] specifying the start of the denoising process. E.g. a value of 0.2
+            would mean that the denoising process start at the last timestep in the schedule >= 0.8.
+        denoising_end (float): A value in [0, 1] specifying the end of the denoising process. E.g. a value of 0.8 would
+            mean that the denoising process end at the last timestep in the schedule >= 0.2.
+
+    Returns:
+        list[float]: The clipped timestep schedule.
+    """
+    assert 0.0 <= denoising_start <= 1.0
+    assert 0.0 <= denoising_end <= 1.0
+    assert denoising_start <= denoising_end
+
+    t_start_val = 1.0 - denoising_start
+    t_end_val = 1.0 - denoising_end
+
+    t_start_idx = _find_last_index_ge_val(timesteps, t_start_val)
+    t_end_idx = _find_last_index_ge_val(timesteps, t_end_val)
+
+    clipped_timesteps = timesteps[t_start_idx : t_end_idx + 1]
+
+    return clipped_timesteps
+
+
 def unpack(x: torch.Tensor, height: int, width: int) -> torch.Tensor:
    """Unpack flat array of patch embeddings to latent image."""
    return rearrange(
--- a/tests/backend/flux/test_sampling_utils.py
+++ b/tests/backend/flux/test_sampling_utils.py
@ -0,0 +1,42 @@
+import pytest
+import torch
+
+from invokeai.backend.flux.sampling_utils import clip_timestep_schedule
+
+
+def float_lists_almost_equal(list1: list[float], list2: list[float], tol: float = 1e-6) -> bool:
+    return all(abs(a - b) < tol for a, b in zip(list1, list2, strict=True))
+
+
+@pytest.mark.parametrize(
+    ["denoising_start", "denoising_end", "expected_timesteps", "raises"],
+    [
+        (0.0, 1.0, [1.0, 0.75, 0.5, 0.25, 0.0], False),  # Default case.
+        (-0.1, 1.0, [], True),  # Negative denoising_start should raise.
+        (0.0, 1.1, [], True),  # denoising_end > 1 should raise.
+        (0.5, 0.0, [], True),  # denoising_start > denoising_end should raise.
+        (0.0, 0.0, [1.0], False),  # denoising_end == 0.
+        (1.0, 1.0, [0.0], False),  # denoising_start == 1.
+        (0.2, 0.8, [1.0, 0.75, 0.5, 0.25], False),  # Middle of the schedule.
+        # If we denoise from 0.0 to x, then from x to 1.0, it is important that denoise_end = x and denoise_start = x
+        # map to the same timestep. We test this first when x is equal to a timestep, then when it falls between two
+        # timesteps.
+        # x = 0.5
+        (0.0, 0.5, [1.0, 0.75, 0.5], False),
+        (0.5, 1.0, [0.5, 0.25, 0.0], False),
+        # x = 0.3
+        (0.0, 0.3, [1.0, 0.75], False),
+        (0.3, 1.0, [0.75, 0.5, 0.25, 0.0], False),
+    ],
+)
+def test_clip_timestep_schedule(
+    denoising_start: float, denoising_end: float, expected_timesteps: list[float], raises: bool
+):
+    timesteps = torch.linspace(1, 0, 5).tolist()
+    if raises:
+        with pytest.raises(AssertionError):
+            clip_timestep_schedule(timesteps, denoising_start, denoising_end)
+    else:
+        assert float_lists_almost_equal(
+            clip_timestep_schedule(timesteps, denoising_start, denoising_end), expected_timesteps
+        )