InvokeAI/invokeai/backend/model_manager/load/model_util.py

# Copyright (c) 2024 The InvokeAI Development Team
"""Various utility functions needed by the loader and caching system."""

import json
import logging
from pathlib import Path
from typing import Optional

import torch
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
from diffusers.schedulers.scheduling_utils import SchedulerMixin
from transformers import CLIPTokenizer, T5Tokenizer, T5TokenizerFast

from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
from invokeai.backend.lora import LoRAModelRaw
from invokeai.backend.model_manager.config import AnyModel
from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
from invokeai.backend.textual_inversion import TextualInversionModelRaw


def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
    """Get size of a model in memory in bytes."""
    # TODO(ryand): We should create a CacheableModel interface for all models, and move the size calculations down to
    # the models themselves.
    if isinstance(model, DiffusionPipeline):
        return _calc_pipeline_by_data(model)
    elif isinstance(model, torch.nn.Module):
        return calc_module_size(model)
    elif isinstance(model, IAIOnnxRuntimeModel):
        return _calc_onnx_model_by_data(model)
    elif isinstance(model, SchedulerMixin):
        return 0
    elif isinstance(model, CLIPTokenizer):
        # TODO(ryand): Accurately calculate the tokenizer's size. It's small enough that it shouldn't matter for now.
        return 0
    elif isinstance(
        model,
        (
            TextualInversionModelRaw,
            IPAdapter,
            LoRAModelRaw,
            SpandrelImageToImageModel,
            GroundingDinoPipeline,
            SegmentAnythingPipeline,
            DepthAnythingPipeline,
        ),
    ):
        return model.calc_size()
    elif isinstance(
        model,
        (
            T5TokenizerFast,
            T5Tokenizer,
        ),
    ):
        # HACK(ryand): len(model) just returns the vocabulary size, so this is blatantly wrong. It should be small
        # relative to the text encoder that it's used with, so shouldn't matter too much, but we should fix this at some
        # point.
        return len(model)
    else:
        # TODO(ryand): Promote this from a log to an exception once we are confident that we are handling all of the
        # supported model types.
        logger.warning(
            f"Failed to calculate model size for unexpected model type: {type(model)}. The model will be treated as "
            "having size 0."
        )
        return 0


def _calc_pipeline_by_data(pipeline: DiffusionPipeline) -> int:
    res = 0
    assert hasattr(pipeline, "components")
    for submodel_key in pipeline.components.keys():
        submodel = getattr(pipeline, submodel_key)
        if submodel is not None and isinstance(submodel, torch.nn.Module):
            res += calc_module_size(submodel)
    return res


def calc_module_size(model: torch.nn.Module) -> int:
    """Calculate the size (in bytes) of a torch.nn.Module."""
    mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])
    mem_bufs = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])
    mem: int = mem_params + mem_bufs  # in bytes
    return mem


def _calc_onnx_model_by_data(model: IAIOnnxRuntimeModel) -> int:
    tensor_size = model.tensors.size() * 2  # The session doubles this
    mem = tensor_size  # in bytes
    return mem


def calc_model_size_by_fs(model_path: Path, subfolder: Optional[str] = None, variant: Optional[str] = None) -> int:
    """Estimate the size of a model on disk in bytes."""
    if model_path.is_file():
        return model_path.stat().st_size

    if subfolder is not None:
        model_path = model_path / subfolder

    # this can happen when, for example, the safety checker is not downloaded.
    if not model_path.exists():
        return 0

    all_files = [f for f in model_path.iterdir() if (model_path / f).is_file()]

    fp16_files = {f for f in all_files if ".fp16." in f.name or ".fp16-" in f.name}
    bit8_files = {f for f in all_files if ".8bit." in f.name or ".8bit-" in f.name}
    other_files = set(all_files) - fp16_files - bit8_files

    if not variant:  # ModelRepoVariant.DEFAULT evaluates to empty string for compatability with HF
        files = other_files
    elif variant == "fp16":
        files = fp16_files
    elif variant == "8bit":
        files = bit8_files
    else:
        raise NotImplementedError(f"Unknown variant: {variant}")

    # try read from index if exists
    index_postfix = ".index.json"
    if variant is not None:
        index_postfix = f".index.{variant}.json"

    for file in files:
        if not file.name.endswith(index_postfix):
            continue
        try:
            with open(model_path / file, "r") as f:
                index_data = json.loads(f.read())
            return int(index_data["metadata"]["total_size"])
        except Exception:
            pass

    # calculate files size if there is no index file
    formats = [
        (".safetensors",),  # safetensors
        (".bin",),  # torch
        (".onnx", ".pb"),  # onnx
        (".msgpack",),  # flax
        (".ckpt",),  # tf
        (".h5",),  # tf2
    ]

    for file_format in formats:
        model_files = [f for f in files if f.suffix in file_format]
        if len(model_files) == 0:
            continue

        model_size = 0
        for model_file in model_files:
            file_stats = (model_path / model_file).stat()
            model_size += file_stats.st_size
        return model_size

    return 0  # scheduler/feature_extractor/tokenizer - models without loading to gpu
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`# Copyright (c) 2024 The InvokeAI Development Team`
			`"""Various utility functions needed by the loader and caching system."""`

			`import json`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`import logging`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`from pathlib import Path`
added textual inversion and lora loaders 2024-02-05 04:18:00 +00:00			`from typing import Optional`
add ram cache module and support files 2024-02-01 04:37:59 +00:00
			`import torch`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`from diffusers.pipelines.pipeline_utils import DiffusionPipeline`
			`from diffusers.schedulers.scheduling_utils import SchedulerMixin`
Run ruff, setup initial text to image node 2024-08-19 14:14:58 +00:00			`from transformers import CLIPTokenizer, T5Tokenizer, T5TokenizerFast`
add ram cache module and support files 2024-02-01 04:37:59 +00:00
fix: Make DepthAnything work with Invoke's Model Management 2024-07-30 22:27:54 +00:00			`from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline`
Split invokeai/backend/image_util/segment_anything/ dir into grounding_dino/ and segment_anything/ 2024-07-31 16:28:47 +00:00			`from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline`
Rename SegmentAnythingModel -> SegmentAnythingPipeline. 2024-08-01 13:57:47 +00:00			`from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`from invokeai.backend.ip_adapter.ip_adapter import IPAdapter`
			`from invokeai.backend.lora import LoRAModelRaw`
loaders for main, controlnet, ip-adapter, clipvision and t2i 2024-02-04 22:23:10 +00:00			`from invokeai.backend.model_manager.config import AnyModel`
added textual inversion and lora loaders 2024-02-05 04:18:00 +00:00			`from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel`
Fix SpandrelImageToImageModel size calculation for the model cache. 2024-07-03 20:38:16 +00:00			`from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`from invokeai.backend.textual_inversion import TextualInversionModelRaw`
add ram cache module and support files 2024-02-01 04:37:59 +00:00

Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`"""Get size of a model in memory in bytes."""`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`# TODO(ryand): We should create a CacheableModel interface for all models, and move the size calculations down to`
			`# the models themselves.`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`if isinstance(model, DiffusionPipeline):`
			`return _calc_pipeline_by_data(model)`
			`elif isinstance(model, torch.nn.Module):`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`return calc_module_size(model)`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`elif isinstance(model, IAIOnnxRuntimeModel):`
			`return _calc_onnx_model_by_data(model)`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`elif isinstance(model, SchedulerMixin):`
			`return 0`
			`elif isinstance(model, CLIPTokenizer):`
			`# TODO(ryand): Accurately calculate the tokenizer's size. It's small enough that it shouldn't matter for now.`
			`return 0`
fix: Make DepthAnything work with Invoke's Model Management 2024-07-30 22:27:54 +00:00			`elif isinstance(`
Add a GroundedSamInvocation for image segmentation from a text prompt (Grounding DINO + Segment Anything Model). 2024-07-29 17:53:14 +00:00			`model,`
			`(`
			`TextualInversionModelRaw,`
			`IPAdapter,`
			`LoRAModelRaw,`
			`SpandrelImageToImageModel,`
			`GroundingDinoPipeline,`
Rename SegmentAnythingModel -> SegmentAnythingPipeline. 2024-08-01 13:57:47 +00:00			`SegmentAnythingPipeline,`
Merge branch 'main' into depth_anything_v2 2024-08-02 19:08:57 +00:00			`DepthAnythingPipeline,`
Add a GroundedSamInvocation for image segmentation from a text prompt (Grounding DINO + Segment Anything Model). 2024-07-29 17:53:14 +00:00			`),`
fix: Make DepthAnything work with Invoke's Model Management 2024-07-30 22:27:54 +00:00			`):`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`return model.calc_size()`
Manage quantization of models within the loader 2024-08-12 22:01:42 +00:00			`elif isinstance(`
			`model,`
Run ruff, setup initial text to image node 2024-08-19 14:14:58 +00:00			`(`
			`T5TokenizerFast,`
			`T5Tokenizer,`
			`),`
Manage quantization of models within the loader 2024-08-12 22:01:42 +00:00			`):`
Add comment about incorrect T5 Tokenizer size calculation. 2024-08-22 16:09:46 +00:00			`# HACK(ryand): len(model) just returns the vocabulary size, so this is blatantly wrong. It should be small`
			`# relative to the text encoder that it's used with, so shouldn't matter too much, but we should fix this at some`
			`# point.`
Manage quantization of models within the loader 2024-08-12 22:01:42 +00:00			`return len(model)`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`else:`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`# TODO(ryand): Promote this from a log to an exception once we are confident that we are handling all of the`
			`# supported model types.`
Demote error log to warning to models treated as having size 0. 2024-07-09 12:35:43 +00:00			`logger.warning(`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`f"Failed to calculate model size for unexpected model type: {type(model)}. The model will be treated as "`
			`"having size 0."`
			`)`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`return 0`


			`def _calc_pipeline_by_data(pipeline: DiffusionPipeline) -> int:`
			`res = 0`
			`assert hasattr(pipeline, "components")`
			`for submodel_key in pipeline.components.keys():`
			`submodel = getattr(pipeline, submodel_key)`
			`if submodel is not None and isinstance(submodel, torch.nn.Module):`
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`res += calc_module_size(submodel)`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`return res`


Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`def calc_module_size(model: torch.nn.Module) -> int:`
			`"""Calculate the size (in bytes) of a torch.nn.Module."""`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])`
			`mem_bufs = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])`
			`mem: int = mem_params + mem_bufs # in bytes`
			`return mem`


			`def _calc_onnx_model_by_data(model: IAIOnnxRuntimeModel) -> int:`
			`tensor_size = model.tensors.size() * 2 # The session doubles this`
			`mem = tensor_size # in bytes`
			`return mem`


			`def calc_model_size_by_fs(model_path: Path, subfolder: Optional[str] = None, variant: Optional[str] = None) -> int:`
			`"""Estimate the size of a model on disk in bytes."""`
model loading and conversion implemented for vaes 2024-02-04 03:55:09 +00:00			`if model_path.is_file():`
			`return model_path.stat().st_size`
loaders for main, controlnet, ip-adapter, clipvision and t2i 2024-02-04 22:23:10 +00:00
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`if subfolder is not None:`
			`model_path = model_path / subfolder`

			`# this can happen when, for example, the safety checker is not downloaded.`
			`if not model_path.exists():`
			`return 0`

			`all_files = [f for f in model_path.iterdir() if (model_path / f).is_file()]`

			`fp16_files = {f for f in all_files if ".fp16." in f.name or ".fp16-" in f.name}`
			`bit8_files = {f for f in all_files if ".8bit." in f.name or ".8bit-" in f.name}`
			`other_files = set(all_files) - fp16_files - bit8_files`

make model manager v2 ready for PR review - Replace legacy model manager service with the v2 manager. - Update invocations to use new load interface. - Fixed many but not all type checking errors in the invocations. Most were unrelated to model manager - Updated routes. All the new routes live under the route tag `model_manager_v2`. To avoid confusion with the old routes, they have the URL prefix `/api/v2/models`. The old routes have been de-registered. - Added a pytest for the loader. - Updated documentation in contributing/MODEL_MANAGER.md 2024-02-10 23:09:45 +00:00			`if not variant: # ModelRepoVariant.DEFAULT evaluates to empty string for compatability with HF`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`files = other_files`
			`elif variant == "fp16":`
			`files = fp16_files`
			`elif variant == "8bit":`
			`files = bit8_files`
			`else:`
			`raise NotImplementedError(f"Unknown variant: {variant}")`

			`# try read from index if exists`
			`index_postfix = ".index.json"`
			`if variant is not None:`
			`index_postfix = f".index.{variant}.json"`

			`for file in files:`
			`if not file.name.endswith(index_postfix):`
			`continue`
			`try:`
			`with open(model_path / file, "r") as f:`
			`index_data = json.loads(f.read())`
			`return int(index_data["metadata"]["total_size"])`
			`except Exception:`
			`pass`

			`# calculate files size if there is no index file`
			`formats = [`
			`(".safetensors",), # safetensors`
			`(".bin",), # torch`
			`(".onnx", ".pb"), # onnx`
			`(".msgpack",), # flax`
			`(".ckpt",), # tf`
			`(".h5",), # tf2`
			`]`

			`for file_format in formats:`
			`model_files = [f for f in files if f.suffix in file_format]`
			`if len(model_files) == 0:`
			`continue`

			`model_size = 0`
			`for model_file in model_files:`
			`file_stats = (model_path / model_file).stat()`
			`model_size += file_stats.st_size`
			`return model_size`

			`return 0 # scheduler/feature_extractor/tokenizer - models without loading to gpu`