Initial implementation of IP-Adapter "begin_step_percent" and "end_step_percent" for controlling on which steps IP-Adapter is applied in the denoising loop.

2024-08-30 20:32:17 +00:00 · 2023-09-16 08:24:12 -07:00 · 2023-09-16 08:24:12 -07:00 · ced297ed21
commit ced297ed21
parent 834751e877
4 changed files with 45 additions and 5 deletions
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@ -34,6 +34,8 @@ class IPAdapterField(BaseModel):
    ip_adapter_model: IPAdapterModelField = Field(description="The IP-Adapter model to use.")
    image_encoder_model: CLIPVisionModelField = Field(description="The name of the CLIP image encoder model.")
    weight: float = Field(default=1.0, ge=0, description="The weight of the IP-Adapter.")
+    begin_step_percent: float = Field(default=0.0, ge=0, le=1.0)
+    end_step_percent: float = Field(default=1.0, ge=0, le=1.0)


@invocation_output("ip_adapter_output")
@ -54,6 +56,12 @@ class IPAdapterInvocation(BaseInvocation):
        input=Input.Direct,
    )
    weight: float = InputField(default=1.0, description="The weight of the IP-Adapter.", ui_type=UIType.Float)
+    begin_step_percent: float = InputField(
+        default=0, ge=-1, le=2, description="When the IP-Adapter is first applied (% of total steps)"
+    )
+    end_step_percent: float = InputField(
+        default=1, ge=0, le=1, description="When the IP-Adapter is last applied (% of total steps)"
+    )

    def invoke(self, context: InvocationContext) -> IPAdapterOutput:
        # Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
@ -80,5 +88,7 @@ class IPAdapterInvocation(BaseInvocation):
                ip_adapter_model=self.ip_adapter_model,
                image_encoder_model=image_encoder_model,
                weight=self.weight,
+                begin_step_percent=self.begin_step_percent,
+                end_step_percent=self.end_step_percent,
            ),
        )
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -446,6 +446,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
        return IPAdapterData(
            ip_adapter_model=ip_adapter_model,
            weight=ip_adapter.weight,
+            begin_step_percent=ip_adapter.begin_step_percent,
+            end_step_percent=ip_adapter.end_step_percent,
        )

    # original idea by https://github.com/AmericanPresidentJimmyCarter
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@ -131,8 +131,18 @@ class IPAdapter:
        self._attn_processors = attn_procs
        self._state_dict = None

+    # @genomancer: pushed scaling back out into its own method (like original Tencent implementation)
+    #      which makes implementing begin_step_percent and end_step_percent easier
+    #  but based on self._attn_processors (ala @Ryan) instead of original Tencent unet.attn_processors,
+    #      which should make it easier to implement multiple IPAdapters
+    def set_scale(self, scale):
+        if self._attn_processors is not None:
+            for attn_processor in self._attn_processors.values():
+                if isinstance(attn_processor, IPAttnProcessor):
+                    attn_processor.scale = scale
+
    @contextmanager
-    def apply_ip_adapter_attention(self, unet: UNet2DConditionModel, scale: int):
+    def apply_ip_adapter_attention(self, unet: UNet2DConditionModel, scale: float):
        """A context manager that patches `unet` with this IP-Adapter's attention processors while it is active.

        Yields:
@ -143,10 +153,11 @@ class IPAdapter:
            # used on any UNet model (with the same dimensions).
            self._prepare_attention_processors(unet)

-        # Set scale.
-        for attn_processor in self._attn_processors.values():
-            if isinstance(attn_processor, IPAttnProcessor):
-                attn_processor.scale = scale
+        # Set scale
+        self.set_scale(scale)
+        # for attn_processor in self._attn_processors.values():
+        #     if isinstance(attn_processor, IPAttnProcessor):
+        #         attn_processor.scale = scale

        orig_attn_processors = unet.attn_processors

--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@ -2,6 +2,7 @@ from __future__ import annotations

 from contextlib import nullcontext
 from dataclasses import dataclass
+import math
 from typing import Any, Callable, List, Optional, Union

 import einops
@ -168,6 +169,8 @@ class IPAdapterData:
    # TODO: change to polymorphic so can do different weights per step (once implemented...)
    # weight: Union[float, List[float]] = Field(default=1.0)
    weight: float = Field(default=1.0)
+    begin_step_percent: float = Field(default=0.0)
+    end_step_percent: float = Field(default=1.0)


@dataclass
@ -445,6 +448,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                    total_step_count=len(timesteps),
                    additional_guidance=additional_guidance,
                    control_data=control_data,
+                    ip_adapter_data=ip_adapter_data,
                )
                latents = step_output.prev_sample

@ -490,6 +494,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        total_step_count: int,
        additional_guidance: List[Callable] = None,
        control_data: List[ControlNetData] = None,
+        ip_adapter_data: Optional[IPAdapterData] = None,
    ):
        # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
        timestep = t[0]
@ -500,6 +505,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        #     i.e. before or after passing it to InvokeAIDiffuserComponent
        latent_model_input = self.scheduler.scale_model_input(latents, timestep)

+        # handle IP-Adapter
+        if ip_adapter_data is not None:
+            first_adapter_step = math.floor(ip_adapter_data.begin_step_percent * total_step_count)
+            last_adapter_step = math.ceil(ip_adapter_data.end_step_percent * total_step_count)
+            if step_index >= first_adapter_step and step_index <= last_adapter_step:
+                # only apply IP-Adapter if current step is within the IP-Adapter's begin/end step range
+                ip_adapter_data.ip_adapter_model.set_scale(ip_adapter_data.weight)
+            else:
+                # otherwise, set IP-Adapter scale to 0, so it has no effect
+                ip_adapter_data.ip_adapter_model.set_scale(0.0)
+
+        # handle ControlNet(s)
        # default is no controlnet, so set controlnet processing output to None
        controlnet_down_block_samples, controlnet_mid_block_sample = None, None
        if control_data is not None: