Update HF download logic to work for black-forest-labs/FLUX.1-schnell.

2024-08-30 20:32:17 +00:00 · 2024-08-06 19:34:49 +00:00 · 2024-08-06 19:34:49 +00:00 · 7d447cbb88
commit 7d447cbb88
parent 3bbba7e4b1
2 changed files with 99 additions and 2 deletions
--- a/invokeai/backend/model_manager/util/select_hf_files.py
+++ b/invokeai/backend/model_manager/util/select_hf_files.py
@ -54,6 +54,7 @@ def filter_files(
                "lora_weights.safetensors",
                "weights.pb",
                "onnx_data",
                "spiece.model", # Added for `black-forest-labs/FLUX.1-schnell`.
            )
        ):
            paths.append(file)
@ -62,7 +63,7 @@ def filter_files(
        # downloading random checkpoints that might also be in the repo. However there is no guarantee
        # that a checkpoint doesn't contain "model" in its name, and no guarantee that future diffusers models
        # will adhere to this naming convention, so this is an area to be careful of.
-        elif re.search(r"model(\.[^.]+)?\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
+        elif re.search(r"model.*\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
            paths.append(file)
    # limit search to subfolder if requested
@ -97,7 +98,9 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
            if variant == ModelRepoVariant.Flax:
                result.add(path)
-        elif path.suffix in [".json", ".txt"]:
+        # Note: '.model' was added to support:
        # https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/768d12a373ed5cc9ef9a9dea7504dc09fcc14842/tokenizer_2/spiece.model
        elif path.suffix in [".json", ".txt", ".model"]:
            result.add(path)
        elif variant in [
@ -140,6 +143,23 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
            continue
    for candidate_list in subfolder_weights.values():
        # Check if at least one of the files has the explicit fp16 variant.
        at_least_one_fp16 = False
        for candidate in candidate_list:
            if len(candidate.path.suffixes) == 2 and candidate.path.suffixes[0] == ".fp16":
                at_least_one_fp16 = True
                break
        if not at_least_one_fp16:
            # If none of the candidates in this candidate_list have the explicit fp16 variant label, then this
            # candidate_list probably doesn't adhere to the variant naming convention that we expected. In this case,
            # we'll simply keep all the candidates. An example of a model that hits this case is
            # `black-forest-labs/FLUX.1-schnell` (as of commit 012d2fd).
            for candidate in candidate_list:
                result.add(candidate.path)
        # The candidate_list seems to have the expected variant naming convention. We'll select the highest scoring
        # candidate.
        highest_score_candidate = max(candidate_list, key=lambda candidate: candidate.score)
        if highest_score_candidate:
            result.add(highest_score_candidate.path)
--- a/tests/backend/model_manager/util/test_hf_model_select.py
+++ b/tests/backend/model_manager/util/test_hf_model_select.py
@ -326,3 +326,80 @@ def test_select_multiple_weights(
 ) -> None:
    filtered_files = filter_files(sd15_test_files, variant)
    assert set(filtered_files) == {Path(f) for f in expected_files}
@pytest.fixture
 def flux_schnell_test_files() -> list[Path]:
    return [
        Path(f)
        for f in [
            "FLUX.1-schnell/.gitattributes",
            "FLUX.1-schnell/README.md",
            "FLUX.1-schnell/ae.safetensors",
            "FLUX.1-schnell/flux1-schnell.safetensors",
            "FLUX.1-schnell/model_index.json",
            "FLUX.1-schnell/scheduler/scheduler_config.json",
            "FLUX.1-schnell/schnell_grid.jpeg",
            "FLUX.1-schnell/text_encoder/config.json",
            "FLUX.1-schnell/text_encoder/model.safetensors",
            "FLUX.1-schnell/text_encoder_2/config.json",
            "FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors",
            "FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors",
            "FLUX.1-schnell/text_encoder_2/model.safetensors.index.json",
            "FLUX.1-schnell/tokenizer/merges.txt",
            "FLUX.1-schnell/tokenizer/special_tokens_map.json",
            "FLUX.1-schnell/tokenizer/tokenizer_config.json",
            "FLUX.1-schnell/tokenizer/vocab.json",
            "FLUX.1-schnell/tokenizer_2/special_tokens_map.json",
            "FLUX.1-schnell/tokenizer_2/spiece.model",
            "FLUX.1-schnell/tokenizer_2/tokenizer.json",
            "FLUX.1-schnell/tokenizer_2/tokenizer_config.json",
            "FLUX.1-schnell/transformer/config.json",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json",
            "FLUX.1-schnell/vae/config.json",
            "FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors",
        ]
    ]
@pytest.mark.parametrize(
    ["variant", "expected_files"],
    [
        (
            ModelRepoVariant.Default,
            [
                "FLUX.1-schnell/model_index.json",
                "FLUX.1-schnell/scheduler/scheduler_config.json",
                "FLUX.1-schnell/text_encoder/config.json",
                "FLUX.1-schnell/text_encoder/model.safetensors",
                "FLUX.1-schnell/text_encoder_2/config.json",
                "FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors",
                "FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors",
                "FLUX.1-schnell/text_encoder_2/model.safetensors.index.json",
                "FLUX.1-schnell/tokenizer/merges.txt",
                "FLUX.1-schnell/tokenizer/special_tokens_map.json",
                "FLUX.1-schnell/tokenizer/tokenizer_config.json",
                "FLUX.1-schnell/tokenizer/vocab.json",
                "FLUX.1-schnell/tokenizer_2/special_tokens_map.json",
                "FLUX.1-schnell/tokenizer_2/spiece.model",
                "FLUX.1-schnell/tokenizer_2/tokenizer.json",
                "FLUX.1-schnell/tokenizer_2/tokenizer_config.json",
                "FLUX.1-schnell/transformer/config.json",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json",
                "FLUX.1-schnell/vae/config.json",
                "FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors",
            ],
        ),
    ],
 )
 def test_select_flux_schnell_files(
    flux_schnell_test_files: list[Path], variant: ModelRepoVariant, expected_files: list[str]
 ) -> None:
    filtered_files = filter_files(flux_schnell_test_files, variant)
    assert set(filtered_files) == {Path(f) for f in expected_files}