diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 40c77f5982..79613b7602 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -90,9 +90,9 @@ class ClipCheckpointModel(ModelLoader): match submodel_type: case SubModelType.Tokenizer: - return CLIPTokenizer.from_pretrained(config.path) + return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer") case SubModelType.TextEncoder: - return CLIPTextModel.from_pretrained(config.path) + return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder") raise ValueError( f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 69b9c1bd27..d08fc9fc97 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -72,8 +72,8 @@ t5_8b_quantized_encoder = StarterModel( clip_l_encoder = StarterModel( name="clip-vit-large-patch14", base=BaseModelType.Any, - source="openai/clip-vit-large-patch14", - description="CLIP-L text encoder (used in FLUX pipelines). ~3GB", + source="InvokeAI/clip-vit-large-patch14-text-encoder::bfloat16", + description="CLIP-L text encoder (used in FLUX pipelines). ~250MB", type=ModelType.CLIPEmbed, )