A very primitive working version of peft patching. It is very slow. LoRAs don't get unloaded yet, so can only be run once. And the results are *slightly* different than the old implementation. I suspect this is because the lora weight is not being applied to the UNet, but there could be other issues as well.

2024-08-30 20:32:17 +00:00 · 2024-04-05 12:02:05 -04:00
parent 22c66cf55b
commit f9fda503a3
3 changed files with 168 additions and 8 deletions
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@ -78,7 +78,7 @@ class CompelInvocation(BaseInvocation):
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            PeftModelPatcher.apply_peft_patch(text_encoder, _lora_loader(), "text_encoder"),
+            PeftModelPatcher.apply_peft_model_to_text_encoder(text_encoder, _lora_loader(), "text_encoder"),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, self.clip.skipped_layers),
        ):
@ -176,7 +176,7 @@ class SDXLPromptInvocationBase:
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            PeftModelPatcher.apply_peft_patch(text_encoder, _lora_loader(), lora_prefix),
+            PeftModelPatcher.apply_peft_model_to_text_encoder(text_encoder, _lora_loader(), lora_prefix),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, clip_field.skipped_layers),
        ):
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -48,9 +48,10 @@ from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus
-from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager import BaseModelType, LoadedModel
 from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.peft.peft_model import PeftModel
+from invokeai.backend.peft.peft_model_patcher import PeftModelPatcher
 from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningData, IPAdapterConditioningInfo
 from invokeai.backend.util.silence_warnings import SilenceWarnings
@ -714,13 +715,12 @@ class DenoiseLatentsInvocation(BaseInvocation):
            def step_callback(state: PipelineIntermediateState) -> None:
                context.util.sd_step_callback(state, unet_config.base)

-            def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+            def _lora_loader() -> Iterator[Tuple[PeftModel, float]]:
                for lora in self.unet.loras:
                    lora_info = context.models.load(lora.lora)
-                    assert isinstance(lora_info.model, LoRAModelRaw)
+                    assert isinstance(lora_info.model, PeftModel)
                    yield (lora_info.model, lora.weight)
                    del lora_info
-                return

            unet_info = context.models.load(self.unet.unet)
            assert isinstance(unet_info.model, UNet2DConditionModel)
@ -730,7 +730,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
                set_seamless(unet_info.model, self.unet.seamless_axes),  # FIXME
                unet_info as unet,
                # Apply the LoRA after unet has been moved to its target device for faster patching.
-                ModelPatcher.apply_lora_unet(unet, _lora_loader()),
+                PeftModelPatcher.apply_peft_model_to_unet(unet, _lora_loader()),
            ):
                assert isinstance(unet, UNet2DConditionModel)
                latents = latents.to(device=unet.device, dtype=unet.dtype)
--- a/invokeai/backend/peft/peft_model_patcher.py
+++ b/invokeai/backend/peft/peft_model_patcher.py
@ -4,11 +4,171 @@ from contextlib import contextmanager
 from typing import Iterator, Tuple

 import torch
+from diffusers.models.lora import text_encoder_attn_modules, text_encoder_mlp_modules
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from diffusers.utils.peft_utils import get_peft_kwargs, scale_lora_layers
+from diffusers.utils.state_dict_utils import convert_state_dict_to_peft, convert_unet_state_dict_to_peft
+from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict

 from invokeai.backend.peft.peft_model import PeftModel

+UNET_NAME = "unet"
+

 class PeftModelPatcher:
+    @classmethod
+    @contextmanager
+    @torch.no_grad()
+    def apply_peft_model_to_text_encoder(
+        cls,
+        text_encoder: torch.nn.Module,
+        peft_models: Iterator[Tuple[PeftModel, float]],
+        prefix: str,
+    ):
+        original_weights = {}
+
+        try:
+            for peft_model, peft_model_weight in peft_models:
+                keys = list(peft_model.state_dict.keys())
+
+                # Load the layers corresponding to text encoder and make necessary adjustments.
+                text_encoder_keys = [k for k in keys if k.startswith(prefix) and k.split(".")[0] == prefix]
+                text_encoder_lora_state_dict = {
+                    k.replace(f"{prefix}.", ""): v for k, v in peft_model.state_dict.items() if k in text_encoder_keys
+                }
+
+                if len(text_encoder_lora_state_dict) == 0:
+                    continue
+
+                if peft_model.name in getattr(text_encoder, "peft_config", {}):
+                    raise ValueError(f"Adapter name {peft_model.name} already in use in the text encoder ({prefix}).")
+
+                rank = {}
+                # TODO(ryand): Is this necessary?
+                # text_encoder_lora_state_dict = convert_state_dict_to_diffusers(text_encoder_lora_state_dict)
+
+                text_encoder_lora_state_dict = convert_state_dict_to_peft(text_encoder_lora_state_dict)
+
+                for name, _ in text_encoder_attn_modules(text_encoder):
+                    rank_key = f"{name}.out_proj.lora_B.weight"
+                    rank[rank_key] = text_encoder_lora_state_dict[rank_key].shape[1]
+
+                patch_mlp = any(".mlp." in key for key in text_encoder_lora_state_dict.keys())
+                if patch_mlp:
+                    for name, _ in text_encoder_mlp_modules(text_encoder):
+                        rank_key_fc1 = f"{name}.fc1.lora_B.weight"
+                        rank_key_fc2 = f"{name}.fc2.lora_B.weight"
+
+                        rank[rank_key_fc1] = text_encoder_lora_state_dict[rank_key_fc1].shape[1]
+                        rank[rank_key_fc2] = text_encoder_lora_state_dict[rank_key_fc2].shape[1]
+
+                network_alphas = peft_model.network_alphas
+                if network_alphas is not None:
+                    alpha_keys = [
+                        k for k in network_alphas.keys() if k.startswith(prefix) and k.split(".")[0] == prefix
+                    ]
+                    network_alphas = {
+                        k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys
+                    }
+
+                lora_config_kwargs = get_peft_kwargs(rank, network_alphas, text_encoder_lora_state_dict, is_unet=False)
+                lora_config_kwargs["inference_mode"] = True
+                lora_config = LoraConfig(**lora_config_kwargs)
+
+                new_text_encoder = inject_adapter_in_model(lora_config, text_encoder, peft_model.name)
+                incompatible_keys = set_peft_model_state_dict(
+                    new_text_encoder, text_encoder_lora_state_dict, peft_model.name
+                )
+                if incompatible_keys is not None:
+                    # check only for unexpected keys
+                    unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+                    if unexpected_keys:
+                        raise ValueError(f"Failed to inject unexpected PEFT keys: {unexpected_keys}")
+
+                # inject LoRA layers and load the state dict
+                # in transformers we automatically check whether the adapter name is already in use or not
+                # text_encoder.load_adapter(
+                #     adapter_name=adapter_name,
+                #     adapter_state_dict=text_encoder_lora_state_dict,
+                #     peft_config=lora_config,
+                # )
+
+                scale_lora_layers(text_encoder, weight=peft_model_weight)
+                text_encoder.to(device=text_encoder.device, dtype=text_encoder.dtype)
+
+            yield
+        finally:
+            # TODO
+            pass
+            # for module_key, weight in original_weights.items():
+            #     model.get_submodule(module_key).weight.copy_(weight)
+
+    @classmethod
+    @contextmanager
+    @torch.no_grad()
+    def apply_peft_model_to_unet(
+        cls,
+        unet: UNet2DConditionModel,
+        peft_models: Iterator[Tuple[PeftModel, float]],
+    ):
+        try:
+            for peft_model, peft_model_weight in peft_models:
+                keys = list(peft_model.state_dict.keys())
+
+                unet_keys = [k for k in keys if k.startswith(UNET_NAME)]
+                state_dict = {
+                    k.replace(f"{UNET_NAME}.", ""): v for k, v in peft_model.state_dict.items() if k in unet_keys
+                }
+
+                network_alphas = peft_model.network_alphas
+                if network_alphas is not None:
+                    alpha_keys = [k for k in network_alphas.keys() if k.startswith(UNET_NAME)]
+                    network_alphas = {
+                        k.replace(f"{UNET_NAME}.", ""): v for k, v in network_alphas.items() if k in alpha_keys
+                    }
+
+                if len(state_dict) == 0:
+                    continue
+
+                if peft_model.name in getattr(unet, "peft_config", {}):
+                    raise ValueError(f"Adapter name {peft_model.name} already in use in the Unet.")
+
+                state_dict = convert_unet_state_dict_to_peft(state_dict)
+
+                if network_alphas is not None:
+                    # The alphas state dict have the same structure as Unet, thus we convert it to peft format using
+                    # `convert_unet_state_dict_to_peft` method.
+                    network_alphas = convert_unet_state_dict_to_peft(network_alphas)
+
+                rank = {}
+                for key, val in state_dict.items():
+                    if "lora_B" in key:
+                        rank[key] = val.shape[1]
+
+                lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict, is_unet=True)
+                lora_config_kwargs["inference_mode"] = True
+                lora_config = LoraConfig(**lora_config_kwargs)
+
+                inject_adapter_in_model(lora_config, unet, adapter_name=peft_model.name)
+                incompatible_keys = set_peft_model_state_dict(unet, state_dict, peft_model.name)
+                if incompatible_keys is not None:
+                    # check only for unexpected keys
+                    unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+                    if unexpected_keys:
+                        raise ValueError(f"Failed to inject unexpected PEFT keys: {unexpected_keys}")
+
+                # TODO(ryand): What does this do?
+                unet.load_attn_procs(state_dict, network_alphas=network_alphas, low_cpu_mem_usage=True)
+
+                # TODO(ryand): Apply the lora weight. Where does diffusers do this? They don't seem to do it when they
+                # patch the UNet.
+            yield
+        finally:
+            # TODO
+            pass
+            # for module_key, weight in original_weights.items():
+            #     model.get_submodule(module_key).weight.copy_(weight)
+
    @classmethod
    @contextmanager
    @torch.no_grad()
@ -28,8 +188,8 @@ class PeftModelPatcher:
                        continue

                    module_key = layer_key.replace(prefix + ".", "")
-                    module_key = module_key.split
                    # TODO(ryand): Make this work.
+
                    module = model_state_dict[module_key]

                    # All of the LoRA weight calculations will be done on the same device as the module weight.