mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
fix: denoise mask incorectly applied after step
This commit is contained in:
parent
29b04b7e83
commit
2af9286345
@ -86,26 +86,27 @@ class AddsMaskGuidance:
|
|||||||
noise: torch.Tensor
|
noise: torch.Tensor
|
||||||
gradient_mask: bool
|
gradient_mask: bool
|
||||||
|
|
||||||
def __call__(self, step_output: Union[BaseOutput, SchedulerOutput], t: torch.Tensor, conditioning) -> BaseOutput:
|
def __call__(self, latents: torch.Tensor, t: torch.Tensor, conditioning) -> torch.Tensor:
|
||||||
output_class = step_output.__class__ # We'll create a new one with masked data.
|
#output_class = step_output.__class__ # We'll create a new one with masked data.
|
||||||
|
|
||||||
# The problem with taking SchedulerOutput instead of the model output is that we're less certain what's in it.
|
# The problem with taking SchedulerOutput instead of the model output is that we're less certain what's in it.
|
||||||
# It's reasonable to assume the first thing is prev_sample, but then does it have other things
|
# It's reasonable to assume the first thing is prev_sample, but then does it have other things
|
||||||
# like pred_original_sample? Should we apply the mask to them too?
|
# like pred_original_sample? Should we apply the mask to them too?
|
||||||
# But what if there's just some other random field?
|
# But what if there's just some other random field?
|
||||||
prev_sample = step_output[0]
|
#prev_sample = step_output[0]
|
||||||
# Mask anything that has the same shape as prev_sample, return others as-is.
|
# Mask anything that has the same shape as prev_sample, return others as-is.
|
||||||
return output_class(
|
# return output_class(
|
||||||
{
|
# {
|
||||||
k: self.apply_mask(v, self._t_for_field(k, t)) if are_like_tensors(prev_sample, v) else v
|
# k: self.apply_mask(v, self._t_for_field(k, t)) if are_like_tensors(prev_sample, v) else v
|
||||||
for k, v in step_output.items()
|
# for k, v in step_output.items()
|
||||||
}
|
# }
|
||||||
)
|
# )
|
||||||
|
return self.apply_mask(latents,t)
|
||||||
|
|
||||||
def _t_for_field(self, field_name: str, t):
|
# def _t_for_field(self, field_name: str, t):
|
||||||
if field_name == "pred_original_sample":
|
# if field_name == "pred_original_sample":
|
||||||
return self.scheduler.timesteps[-1]
|
# return self.scheduler.timesteps[-1]
|
||||||
return t
|
# return t
|
||||||
|
|
||||||
def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor:
|
def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor:
|
||||||
batch_size = latents.size(0)
|
batch_size = latents.size(0)
|
||||||
@ -383,8 +384,12 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
finally:
|
finally:
|
||||||
self.invokeai_diffuser.model_forward_callback = self._unet_forward
|
self.invokeai_diffuser.model_forward_callback = self._unet_forward
|
||||||
|
|
||||||
# restore unmasked part
|
# restore unmasked part after the last step is completed
|
||||||
if mask is not None and not gradient_mask:
|
# in-process masking happens before each step
|
||||||
|
if mask is not None:
|
||||||
|
if gradient_mask:
|
||||||
|
latents = torch.where(mask > 0, latents, orig_latents)
|
||||||
|
else:
|
||||||
latents = torch.lerp(orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype))
|
latents = torch.lerp(orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype))
|
||||||
|
|
||||||
return latents
|
return latents
|
||||||
@ -490,6 +495,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
if additional_guidance is None:
|
if additional_guidance is None:
|
||||||
additional_guidance = []
|
additional_guidance = []
|
||||||
|
|
||||||
|
# one day we will expand this extension point, but for now it just does denoise masking
|
||||||
|
for guidance in additional_guidance:
|
||||||
|
latents = guidance(latents, timestep, conditioning_data)
|
||||||
|
|
||||||
# TODO: should this scaling happen here or inside self._unet_forward?
|
# TODO: should this scaling happen here or inside self._unet_forward?
|
||||||
# i.e. before or after passing it to InvokeAIDiffuserComponent
|
# i.e. before or after passing it to InvokeAIDiffuserComponent
|
||||||
latent_model_input = self.scheduler.scale_model_input(latents, timestep)
|
latent_model_input = self.scheduler.scale_model_input(latents, timestep)
|
||||||
@ -580,22 +589,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
# compute the previous noisy sample x_t -> x_t-1
|
# compute the previous noisy sample x_t -> x_t-1
|
||||||
step_output = self.scheduler.step(noise_pred, timestep, latents, **conditioning_data.scheduler_args)
|
step_output = self.scheduler.step(noise_pred, timestep, latents, **conditioning_data.scheduler_args)
|
||||||
|
|
||||||
# TODO: issue to diffusers?
|
|
||||||
# undo internal counter increment done by scheduler.step, so timestep can be resolved as before call
|
|
||||||
# this needed to be able call scheduler.add_noise with current timestep
|
|
||||||
if self.scheduler.order == 2:
|
|
||||||
self.scheduler._index_counter[timestep.item()] -= 1
|
|
||||||
|
|
||||||
# TODO: this additional_guidance extension point feels redundant with InvokeAIDiffusionComponent.
|
|
||||||
# But the way things are now, scheduler runs _after_ that, so there was
|
|
||||||
# no way to use it to apply an operation that happens after the last scheduler.step.
|
|
||||||
for guidance in additional_guidance:
|
|
||||||
step_output = guidance(step_output, timestep, conditioning_data)
|
|
||||||
|
|
||||||
# restore internal counter
|
|
||||||
if self.scheduler.order == 2:
|
|
||||||
self.scheduler._index_counter[timestep.item()] += 1
|
|
||||||
|
|
||||||
return step_output
|
return step_output
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
Loading…
x
Reference in New Issue
Block a user