diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 58b4843395..f3e44fc221 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -9,7 +9,7 @@ import accelerate import torch import yaml from safetensors.torch import load_file -from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer +from transformers import AutoConfig, AutoModelForTextEncoding, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.services.config.config_default import get_config from invokeai.backend.flux.model import Flux, FluxParams @@ -33,7 +33,8 @@ from invokeai.backend.model_manager.config import ( ) from invokeai.backend.model_manager.load.load_default import ModelLoader from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry -from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel +from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 +from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.util.silence_warnings import SilenceWarnings try: @@ -115,12 +116,33 @@ class T5Encoder8bCheckpointModel(ModelLoader): case SubModelType.Tokenizer2: return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512) case SubModelType.TextEncoder2: - return FastQuantizedTransformersModel.from_pretrained(Path(config.path) / "text_encoder_2") + te2_model_path = Path(config.path) / "text_encoder_2" + model_config = AutoConfig.from_pretrained(te2_model_path) + with accelerate.init_empty_weights(): + model = AutoModelForTextEncoding.from_config(model_config) + model = quantize_model_llm_int8(model, modules_to_not_convert=set()) + + state_dict_path = te2_model_path / "bnb_llm_int8_model.safetensors" + state_dict = load_file(state_dict_path) + self._load_state_dict_into_t5(model, state_dict) + + return model raise ValueError( f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" ) + @classmethod + def _load_state_dict_into_t5(cls, model: T5EncoderModel, state_dict: dict[str, torch.Tensor]): + # There is a shared reference to a single weight tensor in the model. + # Both "encoder.embed_tokens.weight" and "shared.weight" refer to the same tensor, so only the latter should + # be present in the state_dict. + missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False, assign=True) + assert len(unexpected_keys) == 0 + assert set(missing_keys) == {"encoder.embed_tokens.weight"} + # Assert that the layers we expect to be shared are actually shared. + assert model.encoder.embed_tokens.weight is model.shared.weight + @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) class T5EncoderCheckpointModel(ModelLoader): diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 7d5233d767..13a22ee219 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -2,7 +2,7 @@ from typing import Optional from pydantic import BaseModel -from invokeai.backend.model_manager.config import BaseModelType, ModelType +from invokeai.backend.model_manager.config import BaseModelType, ModelFormat, ModelType class StarterModelWithoutDependencies(BaseModel): @@ -11,6 +11,7 @@ class StarterModelWithoutDependencies(BaseModel): name: str base: BaseModelType type: ModelType + format: Optional[ModelFormat] = None is_installed: bool = False @@ -54,17 +55,18 @@ cyberrealistic_negative = StarterModel( t5_base_encoder = StarterModel( name="t5_base_encoder", base=BaseModelType.Any, - source="InvokeAI/flux_schnell::t5_xxl_encoder/base", + source="InvokeAI/t5-v1_1-xxl::bfloat16", description="T5-XXL text encoder (used in FLUX pipelines). ~8GB", type=ModelType.T5Encoder, ) t5_8b_quantized_encoder = StarterModel( - name="t5_8b_quantized_encoder", + name="t5_bnb_int8_quantized_encoder", base=BaseModelType.Any, - source="invokeai/flux_schnell::t5_xxl_encoder/optimum_quanto_qfloat8", - description="T5-XXL text encoder with optimum-quanto qfloat8 quantization (used in FLUX pipelines). ~6GB", + source="InvokeAI/t5-v1_1-xxl::bnb_llm_int8", + description="T5-XXL text encoder with bitsandbytes LLM.int8() quantization (used in FLUX pipelines). ~5GB", type=ModelType.T5Encoder, + format=ModelFormat.T5Encoder8b, ) clip_l_encoder = StarterModel( diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx index 4fc8390890..bd6a2b4268 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx @@ -15,14 +15,14 @@ export const StarterModelsResultItem = memo(({ result }: Props) => { const _allSources = [ { source: result.source, - config: { name: result.name, description: result.description, type: result.type, base: result.base }, + config: { name: result.name, description: result.description, type: result.type, base: result.base, format: result.format }, }, ]; if (result.dependencies) { for (const d of result.dependencies) { _allSources.push({ source: d.source, - config: { name: d.name, description: d.description, type: d.type, base: d.base }, + config: { name: d.name, description: d.description, type: d.type, base: d.base, format: d.format }, }); } }