fix: Make DepthAnything work with Invoke's Model Management

2024-08-30 20:32:17 +00:00 · 2024-07-31 03:57:54 +05:30
parent f170697ebe
commit 18f89ed5ed
4 changed files with 47 additions and 5 deletions
--- a/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
+++ b/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
@ -0,0 +1,26 @@
+from typing import cast
+
+from PIL import Image
+from transformers.pipelines import DepthEstimationPipeline
+
+
+class DepthAnythingPipeline:
+    """Custom wrapper for the Depth Estimation pipeline from transformers adding compatibility
+    for Invoke's Model Management System"""
+
+    def __init__(self, pipeline: DepthEstimationPipeline) -> None:
+        self.pipeline = pipeline
+
+    def generate_depth(self, image: Image.Image, resolution: int = 512):
+        image_width, image_height = image.size
+        depth_map = self.pipeline(image)["depth"]
+        depth_map = cast(Image.Image, depth_map)
+
+        new_height = int(image_height * (resolution / image_width))
+        depth_map = depth_map.resize((resolution, new_height))
+        return depth_map
+
+    def calc_size(self) -> int:
+        from invokeai.backend.model_manager.load.model_util import calc_module_size
+
+        return calc_module_size(self.pipeline.model)
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@ -31,13 +31,16 @@ from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag, TypeAdapt
 from typing_extensions import Annotated, Any, Dict

 from invokeai.app.util.misc import uuid_string
+from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
 from invokeai.backend.model_hash.hash_validator import validate_hash
 from invokeai.backend.raw_model import RawModel
 from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES

 # ModelMixin is the base class for all diffusers and transformers models
 # RawModel is the InvokeAI wrapper class for ip_adapters, loras, textual_inversion and onnx runtime
-AnyModel = Union[ModelMixin, RawModel, torch.nn.Module, Dict[str, torch.Tensor], diffusers.DiffusionPipeline]
+AnyModel = Union[
+    ModelMixin, RawModel, torch.nn.Module, Dict[str, torch.Tensor], diffusers.DiffusionPipeline, DepthAnythingPipeline
+]


 class InvalidModelConfigException(Exception):
--- a/invokeai/backend/model_manager/load/model_util.py
+++ b/invokeai/backend/model_manager/load/model_util.py
@ -11,6 +11,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from transformers import CLIPTokenizer

+from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
 from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager.config import AnyModel
@ -34,7 +35,9 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
    elif isinstance(model, CLIPTokenizer):
        # TODO(ryand): Accurately calculate the tokenizer's size. It's small enough that it shouldn't matter for now.
        return 0
-    elif isinstance(model, (TextualInversionModelRaw, IPAdapter, LoRAModelRaw, SpandrelImageToImageModel)):
+    elif isinstance(
+        model, (TextualInversionModelRaw, IPAdapter, LoRAModelRaw, SpandrelImageToImageModel, DepthAnythingPipeline)
+    ):
        return model.calc_size()
    else:
        # TODO(ryand): Promote this from a log to an exception once we are confident that we are handling all of the