cleanup: Remove manual offload from Depth Anything Processor (#5812)

## What type of PR is this? (check all applicable) - [ ] Refactor - [ ] Feature - [ ] Bug Fix - [ ] Optimization - [ ] Documentation Update - [ ] Community Node Submission ## Have you discussed this change with the InvokeAI team? - [ ] Yes - [ ] No, because: ## Have you updated all relevant documentation? - [ ] Yes - [ ] No ## Description ## Related Tickets & Documents  - Related Issue # - Closes # ## QA Instructions, Screenshots, Recordings  ## Merge Plan  ## Added/updated tests? - [ ] Yes - [ ] No : _please replace this line with details on why tests have not been included_ ## [optional] Are there any post deployment tasks we need to perform?
2024-08-30 20:32:17 +00:00 · 2024-03-01 23:13:06 +05:30 · 2024-03-01 23:13:06 +05:30 · 80fd3d3f3c
commit 80fd3d3f3c
parent 7cfbe5a62a 41b77cd5ff
2 changed files with 16 additions and 15 deletions
--- a/invokeai/app/invocations/controlnet_image_processors.py
+++ b/invokeai/app/invocations/controlnet_image_processors.py
@ -576,7 +576,7 @@ DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
    title="Depth Anything Processor",
    tags=["controlnet", "depth", "depth anything"],
    category="controlnet",
-    version="1.0.0",
+    version="1.0.1",
 )
 class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
    """Generates a depth map based on the Depth Anything algorithm"""
@ -585,13 +585,12 @@ class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
        default="small", description="The size of the depth model to use"
    )
    resolution: int = InputField(default=512, ge=64, multiple_of=64, description=FieldDescriptions.image_res)
-    offload: bool = InputField(default=False)

    def run_processor(self, image: Image.Image):
        depth_anything_detector = DepthAnythingDetector()
        depth_anything_detector.load_model(model_size=self.model_size)

-        processed_image = depth_anything_detector(image=image, resolution=self.resolution, offload=self.offload)
+        processed_image = depth_anything_detector(image=image, resolution=self.resolution)
        return processed_image


--- a/invokeai/backend/image_util/depth_anything/init.py
+++ b/invokeai/backend/image_util/depth_anything/init.py
@ -17,6 +17,8 @@ from invokeai.backend.util.util import download_with_progress_bar

 config = InvokeAIAppConfig.get_config()

+DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
+
 DEPTH_ANYTHING_MODELS = {
    "large": {
        "url": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth?download=true",
@ -53,9 +55,9 @@ transform = Compose(
 class DepthAnythingDetector:
    def __init__(self) -> None:
        self.model = None
-        self.model_size: Union[Literal["large", "base", "small"], None] = None
+        self.model_size: Union[DEPTH_ANYTHING_MODEL_SIZES, None] = None

-    def load_model(self, model_size=Literal["large", "base", "small"]):
+    def load_model(self, model_size: DEPTH_ANYTHING_MODEL_SIZES = "small"):
        DEPTH_ANYTHING_MODEL_PATH = pathlib.Path(config.models_path / DEPTH_ANYTHING_MODELS[model_size]["local"])
        if not DEPTH_ANYTHING_MODEL_PATH.exists():
            download_with_progress_bar(DEPTH_ANYTHING_MODELS[model_size]["url"], DEPTH_ANYTHING_MODEL_PATH)
@ -84,16 +86,19 @@ class DepthAnythingDetector:
        self.model.to(device)
        return self

-    def __call__(self, image, resolution=512, offload=False):
-        image = np.array(image, dtype=np.uint8)
-        image = image[:, :, ::-1] / 255.0
+    def __call__(self, image: Image.Image, resolution: int = 512):
+        if self.model is None:
+            raise Exception("Depth Anything Model not loaded")

-        image_height, image_width = image.shape[:2]
-        image = transform({"image": image})["image"]
-        image = torch.from_numpy(image).unsqueeze(0).to(choose_torch_device())
+        np_image = np.array(image, dtype=np.uint8)
+        np_image = np_image[:, :, ::-1] / 255.0
+
+        image_height, image_width = np_image.shape[:2]
+        np_image = transform({"image": image})["image"]
+        tensor_image = torch.from_numpy(np_image).unsqueeze(0).to(choose_torch_device())

        with torch.no_grad():
-            depth = self.model(image)
+            depth = self.model(tensor_image)
            depth = F.interpolate(depth[None], (image_height, image_width), mode="bilinear", align_corners=False)[0, 0]
            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0

@ -103,7 +108,4 @@ class DepthAnythingDetector:
        new_height = int(image_height * (resolution / image_width))
        depth_map = depth_map.resize((resolution, new_height))

-        if offload:
-            del self.model
-
        return depth_map