diff --git a/invokeai/backend/stable_diffusion/denoise_context.py b/invokeai/backend/stable_diffusion/denoise_context.py index 26c3b02c3b..bcebb2945e 100644 --- a/invokeai/backend/stable_diffusion/denoise_context.py +++ b/invokeai/backend/stable_diffusion/denoise_context.py @@ -31,6 +31,30 @@ class UNetKwargs: @dataclass class DenoiseInputs: + """Initial variables passed to denoise. Supposed to be unchanged. + + Variables: + orig_latents: The latent-space image to denoise. + Shape: [batch, channels, latent_height, latent_width] + - If we are inpainting, this is the initial latent image before noise has been added. + - If we are generating a new image, this should be initialized to zeros. + - In some cases, this may be a partially-noised latent image (e.g. when running the SDXL refiner). + scheduler_step_kwargs: kwargs forwarded to the scheduler.step() method. + conditioning_data: Text conditionging data. + noise: Noise used for two purposes: + Shape: [1 or batch, channels, latent_height, latent_width] + 1. Used by the scheduler to noise the initial `latents` before denoising. + 2. Used to noise the `masked_latents` when inpainting. + `noise` should be None if the `latents` tensor has already been noised. + seed: The seed used to generate the noise for the denoising process. + HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the + same noise used earlier in the pipeline. This should really be handled in a clearer way. + timesteps: The timestep schedule for the denoising process. + init_timestep: The first timestep in the schedule. This is used to determine the initial noise level, so + should be populated if you want noise applied *even* if timesteps is empty. + attention_processor_cls: Class of attention processor that is used. + """ + orig_latents: torch.Tensor scheduler_step_kwargs: dict[str, Any] conditioning_data: TextConditioningData @@ -43,6 +67,41 @@ class DenoiseInputs: @dataclass class DenoiseContext: + """Context with all variables in denoise + + Variables: + inputs: Initial variables passed to denoise. Supposed to be unchanged. + scheduler: Scheduler which used to apply noise predictions. + unet: UNet model. + latents: Current state of latent-space image in denoising process. + None until `pre_denoise_loop` callback. + Shape: [batch, channels, latent_height, latent_width] + step_index: Current denoising step index. + None until `pre_step` callback. + timestep: Current denoising step timestep. + None until `pre_step` callback. + unet_kwargs: Arguments which will be passed to U Net model. + Available in `pre_unet`/`post_unet` callbacks, otherwice will be None. + step_output: SchedulerOutput class returned from step function(normally, generated by scheduler). + Supposed to be used only in `post_step` callback, otherwice can be None. + latent_model_input: Scaled version of `latents`, which will be passed to unet_kwargs initialization. + Available in events inside step(between `pre_step` and `post_stop`). + Shape: [batch, channels, latent_height, latent_width] + conditioning_mode: [TMP] Defines on which conditionings current unet call will be runned. + Available in `pre_unet`/`post_unet` callbacks, otherwice will be None. + Can be "negative", "positive" or "both" + negative_noise_pred: [TMP] Noise predictions from negative conditioning. + Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwice will be None. + Shape: [batch, channels, latent_height, latent_width] + positive_noise_pred: [TMP] Noise predictions from positive conditioning. + Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwice will be None. + Shape: [batch, channels, latent_height, latent_width] + noise_pred: Combined noise prediction from passed conditionings. + Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwice will be None. + Shape: [batch, channels, latent_height, latent_width] + extra: Dictionary for extensions to pass extra info about denoise process to other extensions. + """ + inputs: DenoiseInputs scheduler: SchedulerMixin