mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Initial implementation of IP-Adapter "begin_step_percent" and "end_step_percent" for controlling on which steps IP-Adapter is applied in the denoising loop.
This commit is contained in:
parent
834751e877
commit
ced297ed21
@ -34,6 +34,8 @@ class IPAdapterField(BaseModel):
|
|||||||
ip_adapter_model: IPAdapterModelField = Field(description="The IP-Adapter model to use.")
|
ip_adapter_model: IPAdapterModelField = Field(description="The IP-Adapter model to use.")
|
||||||
image_encoder_model: CLIPVisionModelField = Field(description="The name of the CLIP image encoder model.")
|
image_encoder_model: CLIPVisionModelField = Field(description="The name of the CLIP image encoder model.")
|
||||||
weight: float = Field(default=1.0, ge=0, description="The weight of the IP-Adapter.")
|
weight: float = Field(default=1.0, ge=0, description="The weight of the IP-Adapter.")
|
||||||
|
begin_step_percent: float = Field(default=0.0, ge=0, le=1.0)
|
||||||
|
end_step_percent: float = Field(default=1.0, ge=0, le=1.0)
|
||||||
|
|
||||||
|
|
||||||
@invocation_output("ip_adapter_output")
|
@invocation_output("ip_adapter_output")
|
||||||
@ -54,6 +56,12 @@ class IPAdapterInvocation(BaseInvocation):
|
|||||||
input=Input.Direct,
|
input=Input.Direct,
|
||||||
)
|
)
|
||||||
weight: float = InputField(default=1.0, description="The weight of the IP-Adapter.", ui_type=UIType.Float)
|
weight: float = InputField(default=1.0, description="The weight of the IP-Adapter.", ui_type=UIType.Float)
|
||||||
|
begin_step_percent: float = InputField(
|
||||||
|
default=0, ge=-1, le=2, description="When the IP-Adapter is first applied (% of total steps)"
|
||||||
|
)
|
||||||
|
end_step_percent: float = InputField(
|
||||||
|
default=1, ge=0, le=1, description="When the IP-Adapter is last applied (% of total steps)"
|
||||||
|
)
|
||||||
|
|
||||||
def invoke(self, context: InvocationContext) -> IPAdapterOutput:
|
def invoke(self, context: InvocationContext) -> IPAdapterOutput:
|
||||||
# Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
|
# Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
|
||||||
@ -80,5 +88,7 @@ class IPAdapterInvocation(BaseInvocation):
|
|||||||
ip_adapter_model=self.ip_adapter_model,
|
ip_adapter_model=self.ip_adapter_model,
|
||||||
image_encoder_model=image_encoder_model,
|
image_encoder_model=image_encoder_model,
|
||||||
weight=self.weight,
|
weight=self.weight,
|
||||||
|
begin_step_percent=self.begin_step_percent,
|
||||||
|
end_step_percent=self.end_step_percent,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@ -446,6 +446,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
|
|||||||
return IPAdapterData(
|
return IPAdapterData(
|
||||||
ip_adapter_model=ip_adapter_model,
|
ip_adapter_model=ip_adapter_model,
|
||||||
weight=ip_adapter.weight,
|
weight=ip_adapter.weight,
|
||||||
|
begin_step_percent=ip_adapter.begin_step_percent,
|
||||||
|
end_step_percent=ip_adapter.end_step_percent,
|
||||||
)
|
)
|
||||||
|
|
||||||
# original idea by https://github.com/AmericanPresidentJimmyCarter
|
# original idea by https://github.com/AmericanPresidentJimmyCarter
|
||||||
|
@ -131,8 +131,18 @@ class IPAdapter:
|
|||||||
self._attn_processors = attn_procs
|
self._attn_processors = attn_procs
|
||||||
self._state_dict = None
|
self._state_dict = None
|
||||||
|
|
||||||
|
# @genomancer: pushed scaling back out into its own method (like original Tencent implementation)
|
||||||
|
# which makes implementing begin_step_percent and end_step_percent easier
|
||||||
|
# but based on self._attn_processors (ala @Ryan) instead of original Tencent unet.attn_processors,
|
||||||
|
# which should make it easier to implement multiple IPAdapters
|
||||||
|
def set_scale(self, scale):
|
||||||
|
if self._attn_processors is not None:
|
||||||
|
for attn_processor in self._attn_processors.values():
|
||||||
|
if isinstance(attn_processor, IPAttnProcessor):
|
||||||
|
attn_processor.scale = scale
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def apply_ip_adapter_attention(self, unet: UNet2DConditionModel, scale: int):
|
def apply_ip_adapter_attention(self, unet: UNet2DConditionModel, scale: float):
|
||||||
"""A context manager that patches `unet` with this IP-Adapter's attention processors while it is active.
|
"""A context manager that patches `unet` with this IP-Adapter's attention processors while it is active.
|
||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
@ -143,10 +153,11 @@ class IPAdapter:
|
|||||||
# used on any UNet model (with the same dimensions).
|
# used on any UNet model (with the same dimensions).
|
||||||
self._prepare_attention_processors(unet)
|
self._prepare_attention_processors(unet)
|
||||||
|
|
||||||
# Set scale.
|
# Set scale
|
||||||
for attn_processor in self._attn_processors.values():
|
self.set_scale(scale)
|
||||||
if isinstance(attn_processor, IPAttnProcessor):
|
# for attn_processor in self._attn_processors.values():
|
||||||
attn_processor.scale = scale
|
# if isinstance(attn_processor, IPAttnProcessor):
|
||||||
|
# attn_processor.scale = scale
|
||||||
|
|
||||||
orig_attn_processors = unet.attn_processors
|
orig_attn_processors = unet.attn_processors
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
import math
|
||||||
from typing import Any, Callable, List, Optional, Union
|
from typing import Any, Callable, List, Optional, Union
|
||||||
|
|
||||||
import einops
|
import einops
|
||||||
@ -168,6 +169,8 @@ class IPAdapterData:
|
|||||||
# TODO: change to polymorphic so can do different weights per step (once implemented...)
|
# TODO: change to polymorphic so can do different weights per step (once implemented...)
|
||||||
# weight: Union[float, List[float]] = Field(default=1.0)
|
# weight: Union[float, List[float]] = Field(default=1.0)
|
||||||
weight: float = Field(default=1.0)
|
weight: float = Field(default=1.0)
|
||||||
|
begin_step_percent: float = Field(default=0.0)
|
||||||
|
end_step_percent: float = Field(default=1.0)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -445,6 +448,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
total_step_count=len(timesteps),
|
total_step_count=len(timesteps),
|
||||||
additional_guidance=additional_guidance,
|
additional_guidance=additional_guidance,
|
||||||
control_data=control_data,
|
control_data=control_data,
|
||||||
|
ip_adapter_data=ip_adapter_data,
|
||||||
)
|
)
|
||||||
latents = step_output.prev_sample
|
latents = step_output.prev_sample
|
||||||
|
|
||||||
@ -490,6 +494,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
total_step_count: int,
|
total_step_count: int,
|
||||||
additional_guidance: List[Callable] = None,
|
additional_guidance: List[Callable] = None,
|
||||||
control_data: List[ControlNetData] = None,
|
control_data: List[ControlNetData] = None,
|
||||||
|
ip_adapter_data: Optional[IPAdapterData] = None,
|
||||||
):
|
):
|
||||||
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
|
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
|
||||||
timestep = t[0]
|
timestep = t[0]
|
||||||
@ -500,6 +505,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
# i.e. before or after passing it to InvokeAIDiffuserComponent
|
# i.e. before or after passing it to InvokeAIDiffuserComponent
|
||||||
latent_model_input = self.scheduler.scale_model_input(latents, timestep)
|
latent_model_input = self.scheduler.scale_model_input(latents, timestep)
|
||||||
|
|
||||||
|
# handle IP-Adapter
|
||||||
|
if ip_adapter_data is not None:
|
||||||
|
first_adapter_step = math.floor(ip_adapter_data.begin_step_percent * total_step_count)
|
||||||
|
last_adapter_step = math.ceil(ip_adapter_data.end_step_percent * total_step_count)
|
||||||
|
if step_index >= first_adapter_step and step_index <= last_adapter_step:
|
||||||
|
# only apply IP-Adapter if current step is within the IP-Adapter's begin/end step range
|
||||||
|
ip_adapter_data.ip_adapter_model.set_scale(ip_adapter_data.weight)
|
||||||
|
else:
|
||||||
|
# otherwise, set IP-Adapter scale to 0, so it has no effect
|
||||||
|
ip_adapter_data.ip_adapter_model.set_scale(0.0)
|
||||||
|
|
||||||
|
# handle ControlNet(s)
|
||||||
# default is no controlnet, so set controlnet processing output to None
|
# default is no controlnet, so set controlnet processing output to None
|
||||||
controlnet_down_block_samples, controlnet_mid_block_sample = None, None
|
controlnet_down_block_samples, controlnet_mid_block_sample = None, None
|
||||||
if control_data is not None:
|
if control_data is not None:
|
||||||
|
Loading…
Reference in New Issue
Block a user