Initial implementation of IP-Adapter "begin_step_percent" and "end_step_percent" for controlling on which steps IP-Adapter is applied in the denoising loop.

This commit is contained in:
user1 2023-09-16 08:24:12 -07:00
parent 834751e877
commit ced297ed21
4 changed files with 45 additions and 5 deletions

View File

@ -34,6 +34,8 @@ class IPAdapterField(BaseModel):
ip_adapter_model: IPAdapterModelField = Field(description="The IP-Adapter model to use.") ip_adapter_model: IPAdapterModelField = Field(description="The IP-Adapter model to use.")
image_encoder_model: CLIPVisionModelField = Field(description="The name of the CLIP image encoder model.") image_encoder_model: CLIPVisionModelField = Field(description="The name of the CLIP image encoder model.")
weight: float = Field(default=1.0, ge=0, description="The weight of the IP-Adapter.") weight: float = Field(default=1.0, ge=0, description="The weight of the IP-Adapter.")
begin_step_percent: float = Field(default=0.0, ge=0, le=1.0)
end_step_percent: float = Field(default=1.0, ge=0, le=1.0)
@invocation_output("ip_adapter_output") @invocation_output("ip_adapter_output")
@ -54,6 +56,12 @@ class IPAdapterInvocation(BaseInvocation):
input=Input.Direct, input=Input.Direct,
) )
weight: float = InputField(default=1.0, description="The weight of the IP-Adapter.", ui_type=UIType.Float) weight: float = InputField(default=1.0, description="The weight of the IP-Adapter.", ui_type=UIType.Float)
begin_step_percent: float = InputField(
default=0, ge=-1, le=2, description="When the IP-Adapter is first applied (% of total steps)"
)
end_step_percent: float = InputField(
default=1, ge=0, le=1, description="When the IP-Adapter is last applied (% of total steps)"
)
def invoke(self, context: InvocationContext) -> IPAdapterOutput: def invoke(self, context: InvocationContext) -> IPAdapterOutput:
# Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model. # Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
@ -80,5 +88,7 @@ class IPAdapterInvocation(BaseInvocation):
ip_adapter_model=self.ip_adapter_model, ip_adapter_model=self.ip_adapter_model,
image_encoder_model=image_encoder_model, image_encoder_model=image_encoder_model,
weight=self.weight, weight=self.weight,
begin_step_percent=self.begin_step_percent,
end_step_percent=self.end_step_percent,
), ),
) )

View File

@ -446,6 +446,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
return IPAdapterData( return IPAdapterData(
ip_adapter_model=ip_adapter_model, ip_adapter_model=ip_adapter_model,
weight=ip_adapter.weight, weight=ip_adapter.weight,
begin_step_percent=ip_adapter.begin_step_percent,
end_step_percent=ip_adapter.end_step_percent,
) )
# original idea by https://github.com/AmericanPresidentJimmyCarter # original idea by https://github.com/AmericanPresidentJimmyCarter

View File

@ -131,8 +131,18 @@ class IPAdapter:
self._attn_processors = attn_procs self._attn_processors = attn_procs
self._state_dict = None self._state_dict = None
# @genomancer: pushed scaling back out into its own method (like original Tencent implementation)
# which makes implementing begin_step_percent and end_step_percent easier
# but based on self._attn_processors (ala @Ryan) instead of original Tencent unet.attn_processors,
# which should make it easier to implement multiple IPAdapters
def set_scale(self, scale):
if self._attn_processors is not None:
for attn_processor in self._attn_processors.values():
if isinstance(attn_processor, IPAttnProcessor):
attn_processor.scale = scale
@contextmanager @contextmanager
def apply_ip_adapter_attention(self, unet: UNet2DConditionModel, scale: int): def apply_ip_adapter_attention(self, unet: UNet2DConditionModel, scale: float):
"""A context manager that patches `unet` with this IP-Adapter's attention processors while it is active. """A context manager that patches `unet` with this IP-Adapter's attention processors while it is active.
Yields: Yields:
@ -143,10 +153,11 @@ class IPAdapter:
# used on any UNet model (with the same dimensions). # used on any UNet model (with the same dimensions).
self._prepare_attention_processors(unet) self._prepare_attention_processors(unet)
# Set scale. # Set scale
for attn_processor in self._attn_processors.values(): self.set_scale(scale)
if isinstance(attn_processor, IPAttnProcessor): # for attn_processor in self._attn_processors.values():
attn_processor.scale = scale # if isinstance(attn_processor, IPAttnProcessor):
# attn_processor.scale = scale
orig_attn_processors = unet.attn_processors orig_attn_processors = unet.attn_processors

View File

@ -2,6 +2,7 @@ from __future__ import annotations
from contextlib import nullcontext from contextlib import nullcontext
from dataclasses import dataclass from dataclasses import dataclass
import math
from typing import Any, Callable, List, Optional, Union from typing import Any, Callable, List, Optional, Union
import einops import einops
@ -168,6 +169,8 @@ class IPAdapterData:
# TODO: change to polymorphic so can do different weights per step (once implemented...) # TODO: change to polymorphic so can do different weights per step (once implemented...)
# weight: Union[float, List[float]] = Field(default=1.0) # weight: Union[float, List[float]] = Field(default=1.0)
weight: float = Field(default=1.0) weight: float = Field(default=1.0)
begin_step_percent: float = Field(default=0.0)
end_step_percent: float = Field(default=1.0)
@dataclass @dataclass
@ -445,6 +448,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
total_step_count=len(timesteps), total_step_count=len(timesteps),
additional_guidance=additional_guidance, additional_guidance=additional_guidance,
control_data=control_data, control_data=control_data,
ip_adapter_data=ip_adapter_data,
) )
latents = step_output.prev_sample latents = step_output.prev_sample
@ -490,6 +494,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
total_step_count: int, total_step_count: int,
additional_guidance: List[Callable] = None, additional_guidance: List[Callable] = None,
control_data: List[ControlNetData] = None, control_data: List[ControlNetData] = None,
ip_adapter_data: Optional[IPAdapterData] = None,
): ):
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
timestep = t[0] timestep = t[0]
@ -500,6 +505,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# i.e. before or after passing it to InvokeAIDiffuserComponent # i.e. before or after passing it to InvokeAIDiffuserComponent
latent_model_input = self.scheduler.scale_model_input(latents, timestep) latent_model_input = self.scheduler.scale_model_input(latents, timestep)
# handle IP-Adapter
if ip_adapter_data is not None:
first_adapter_step = math.floor(ip_adapter_data.begin_step_percent * total_step_count)
last_adapter_step = math.ceil(ip_adapter_data.end_step_percent * total_step_count)
if step_index >= first_adapter_step and step_index <= last_adapter_step:
# only apply IP-Adapter if current step is within the IP-Adapter's begin/end step range
ip_adapter_data.ip_adapter_model.set_scale(ip_adapter_data.weight)
else:
# otherwise, set IP-Adapter scale to 0, so it has no effect
ip_adapter_data.ip_adapter_model.set_scale(0.0)
# handle ControlNet(s)
# default is no controlnet, so set controlnet processing output to None # default is no controlnet, so set controlnet processing output to None
controlnet_down_block_samples, controlnet_mid_block_sample = None, None controlnet_down_block_samples, controlnet_mid_block_sample = None, None
if control_data is not None: if control_data is not None: