LoRA patching optimization (#6439)

* allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes added during penultimate review --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com>
2024-08-30 20:32:17 +00:00 · 2024-06-06 09:53:35 -04:00
parent 1c5c3cdbd6
commit 2871676f79
7 changed files with 146 additions and 48 deletions
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@ -81,9 +81,13 @@ class CompelInvocation(BaseInvocation):

        with (
            # apply all patches while the model is on the target device
-            text_encoder_info as text_encoder,
+            text_encoder_info.model_on_device() as (model_state_dict, text_encoder),
            tokenizer_info as tokenizer,
-            ModelPatcher.apply_lora_text_encoder(text_encoder, _lora_loader()),
+            ModelPatcher.apply_lora_text_encoder(
+                text_encoder,
+                loras=_lora_loader(),
+                model_state_dict=model_state_dict,
+            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder, self.clip.skipped_layers),
            ModelPatcher.apply_ti(tokenizer, text_encoder, ti_list) as (
@ -172,9 +176,14 @@ class SDXLPromptInvocationBase:

        with (
            # apply all patches while the model is on the target device
-            text_encoder_info as text_encoder,
+            text_encoder_info.model_on_device() as (state_dict, text_encoder),
            tokenizer_info as tokenizer,
-            ModelPatcher.apply_lora(text_encoder, _lora_loader(), lora_prefix),
+            ModelPatcher.apply_lora(
+                text_encoder,
+                loras=_lora_loader(),
+                prefix=lora_prefix,
+                model_state_dict=state_dict,
+            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder, clip_field.skipped_layers),
            ModelPatcher.apply_ti(tokenizer, text_encoder, ti_list) as (