diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index fd7f53df10..cba2dbbc23 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -72,7 +72,7 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): t5_embeddings: torch.Tensor, ): transformer_info = context.models.load(self.transformer.transformer) - inference_dtype = TorchDevice.choose_torch_dtype() + inference_dtype = torch.bfloat16 # Prepare input noise. # TODO(ryand): Does the seed behave the same on different devices? Should we re-implement this to always use a diff --git a/invokeai/app/services/model_install/model_install_default.py b/invokeai/app/services/model_install/model_install_default.py index e1d784f5bf..0369b86fb4 100644 --- a/invokeai/app/services/model_install/model_install_default.py +++ b/invokeai/app/services/model_install/model_install_default.py @@ -783,8 +783,9 @@ class ModelInstallService(ModelInstallServiceBase): # So what we do is to synthesize a folder named "sdxl-turbo_vae" here. if subfolder: top = Path(remote_files[0].path.parts[0]) # e.g. "sdxl-turbo/" - path_to_remove = top / subfolder.parts[-1] # sdxl-turbo/vae/ - path_to_add = Path(f"{top}_{subfolder}") + path_to_remove = top / subfolder # sdxl-turbo/vae/ + subfolder_rename = subfolder.name.replace('/', '_').replace('\\', '_') + path_to_add = Path(f"{top}_{subfolder_rename}") else: path_to_remove = Path(".") path_to_add = Path(".") diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 5ef7f460ce..658f626dbb 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -64,8 +64,7 @@ class FluxVAELoader(GenericDiffusersLoader): params = AutoEncoderParams(**filtered_data) with SilenceWarnings(): - model = load_class(params).to(self._torch_dtype) - # load_sft doesn't support torch.device + model = load_class(params) sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) @@ -203,8 +202,6 @@ class FluxBnbQuantizednf4bCheckpointModel(GenericDiffusersLoader): with accelerate.init_empty_weights(): model = load_class(params) model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) - # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle - # this on GPUs without bfloat16 support. sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) return model diff --git a/invokeai/backend/model_manager/util/select_hf_files.py b/invokeai/backend/model_manager/util/select_hf_files.py index 60abc3384c..b0d33d6efb 100644 --- a/invokeai/backend/model_manager/util/select_hf_files.py +++ b/invokeai/backend/model_manager/util/select_hf_files.py @@ -69,7 +69,7 @@ def filter_files( # limit search to subfolder if requested if subfolder: subfolder = root / subfolder - paths = [x for x in paths if x.parent == Path(subfolder)] + paths = [x for x in paths if Path(subfolder) in x.parents] # _filter_by_variant uniquifies the paths and returns a set return sorted(_filter_by_variant(paths, variant)) diff --git a/invokeai/backend/quantization/bnb_nf4.py b/invokeai/backend/quantization/bnb_nf4.py index 28a0861449..105bf1474c 100644 --- a/invokeai/backend/quantization/bnb_nf4.py +++ b/invokeai/backend/quantization/bnb_nf4.py @@ -116,7 +116,7 @@ def _convert_linear_layers_to_nf4( child.in_features, child.out_features, bias=has_bias, - compute_dtype=torch.float16, + compute_dtype=compute_dtype, compress_statistics=compress_statistics, ) if has_bias: