From d23ad1818df352633443d3cb463f1f28ae6b89eb Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 9 Aug 2024 16:39:43 +0000 Subject: [PATCH] Make quantized loading fast. --- invokeai/backend/requantize.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/invokeai/backend/requantize.py b/invokeai/backend/requantize.py index 0e9356b60b..5f506f487d 100644 --- a/invokeai/backend/requantize.py +++ b/invokeai/backend/requantize.py @@ -1,14 +1,13 @@ from typing import Any, Dict import torch -from optimum.quanto.nn import QModuleMixin -from optimum.quanto.quantize import _quantize_submodule, freeze +from optimum.quanto.quantize import _quantize_submodule - -def custom_freeze(model: torch.nn.Module): - for name, m in model.named_modules(): - if isinstance(m, QModuleMixin): - m.freeze() +# def custom_freeze(model: torch.nn.Module): +# for name, m in model.named_modules(): +# if isinstance(m, QModuleMixin): +# m.weight = +# m.freeze() def requantize( @@ -47,8 +46,8 @@ def requantize( for name, param in m.named_buffers(recurse=False): setattr(m, name, move_tensor(param, "cpu")) # Freeze model and move to target device - freeze(model) - model.to(device) + # freeze(model) + # model.to(device) # Load the quantized model weights model.load_state_dict(state_dict, strict=False)