mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Move invokeai/backend/grounded_sam -> invokeai/backend/image_util/grounded_sam
This commit is contained in:
30
invokeai/backend/image_util/grounded_sam/detection_result.py
Normal file
30
invokeai/backend/image_util/grounded_sam/detection_result.py
Normal file
@ -0,0 +1,30 @@
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy.typing as npt
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class BoundingBox(BaseModel):
|
||||
"""Bounding box helper class."""
|
||||
|
||||
xmin: int
|
||||
ymin: int
|
||||
xmax: int
|
||||
ymax: int
|
||||
|
||||
def to_box(self) -> list[int]:
|
||||
"""Convert to the array notation expected by SAM."""
|
||||
return [self.xmin, self.ymin, self.xmax, self.ymax]
|
||||
|
||||
|
||||
class DetectionResult(BaseModel):
|
||||
"""Detection result from Grounding DINO or Grounded SAM."""
|
||||
|
||||
score: float
|
||||
label: str
|
||||
box: BoundingBox
|
||||
mask: Optional[npt.NDArray[Any]] = None
|
||||
model_config = ConfigDict(
|
||||
# Allow arbitrary types for mask, since it will be a numpy array.
|
||||
arbitrary_types_allowed=True
|
||||
)
|
@ -0,0 +1,36 @@
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers.pipelines import ZeroShotObjectDetectionPipeline
|
||||
|
||||
from invokeai.backend.image_util.grounded_sam.detection_result import DetectionResult
|
||||
|
||||
|
||||
class GroundingDinoPipeline:
|
||||
"""A wrapper class for a ZeroShotObjectDetectionPipeline that makes it compatible with the model manager's memory
|
||||
management system.
|
||||
"""
|
||||
|
||||
def __init__(self, pipeline: ZeroShotObjectDetectionPipeline):
|
||||
self._pipeline = pipeline
|
||||
|
||||
def detect(self, image: Image.Image, candidate_labels: list[str], threshold: float = 0.1) -> list[DetectionResult]:
|
||||
results = self._pipeline(image=image, candidate_labels=candidate_labels, threshold=threshold)
|
||||
results = [DetectionResult.model_validate(result) for result in results]
|
||||
return results
|
||||
|
||||
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> "GroundingDinoPipeline":
|
||||
# HACK(ryand): The GroundingDinoPipeline does not work on MPS devices. We only allow it to be moved to CPU or
|
||||
# CUDA.
|
||||
if device is not None and device.type not in {"cpu", "cuda"}:
|
||||
device = None
|
||||
self._pipeline.model.to(device=device, dtype=dtype)
|
||||
self._pipeline.device = self._pipeline.model.device
|
||||
return self
|
||||
|
||||
def calc_size(self) -> int:
|
||||
# HACK(ryand): Fix the circular import issue.
|
||||
from invokeai.backend.model_manager.load.model_util import calc_module_size
|
||||
|
||||
return calc_module_size(self._pipeline.model)
|
50
invokeai/backend/image_util/grounded_sam/mask_refinement.py
Normal file
50
invokeai/backend/image_util/grounded_sam/mask_refinement.py
Normal file
@ -0,0 +1,50 @@
|
||||
# This file contains utilities for Grounded-SAM mask refinement based on:
|
||||
# https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
|
||||
def mask_to_polygon(mask: npt.NDArray[np.uint8]) -> list[tuple[int, int]]:
|
||||
"""Convert a binary mask to a polygon.
|
||||
|
||||
Returns:
|
||||
list[list[int]]: List of (x, y) coordinates representing the vertices of the polygon.
|
||||
"""
|
||||
# Find contours in the binary mask.
|
||||
contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# Find the contour with the largest area.
|
||||
largest_contour = max(contours, key=cv2.contourArea)
|
||||
|
||||
# Extract the vertices of the contour.
|
||||
polygon = largest_contour.reshape(-1, 2).tolist()
|
||||
|
||||
return polygon
|
||||
|
||||
|
||||
def polygon_to_mask(
|
||||
polygon: list[tuple[int, int]], image_shape: tuple[int, int], fill_value: int = 1
|
||||
) -> npt.NDArray[np.uint8]:
|
||||
"""Convert a polygon to a segmentation mask.
|
||||
|
||||
Args:
|
||||
polygon (list): List of (x, y) coordinates representing the vertices of the polygon.
|
||||
image_shape (tuple): Shape of the image (height, width) for the mask.
|
||||
fill_value (int): Value to fill the polygon with.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the polygon filled (with value 255).
|
||||
"""
|
||||
# Create an empty mask.
|
||||
mask = np.zeros(image_shape, dtype=np.uint8)
|
||||
|
||||
# Convert polygon to an array of points.
|
||||
pts = np.array(polygon, dtype=np.int32)
|
||||
|
||||
# Fill the polygon with white color (255).
|
||||
cv2.fillPoly(mask, [pts], color=(fill_value,))
|
||||
|
||||
return mask
|
@ -0,0 +1,49 @@
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers.models.sam import SamModel
|
||||
from transformers.models.sam.processing_sam import SamProcessor
|
||||
|
||||
from invokeai.backend.image_util.grounded_sam.detection_result import DetectionResult
|
||||
|
||||
|
||||
class SegmentAnythingModel:
|
||||
"""A wrapper class for the transformers SAM model and processor that makes it compatible with the model manager."""
|
||||
|
||||
def __init__(self, sam_model: SamModel, sam_processor: SamProcessor):
|
||||
self._sam_model = sam_model
|
||||
self._sam_processor = sam_processor
|
||||
|
||||
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> "SegmentAnythingModel":
|
||||
# HACK(ryand): The SAM pipeline does not work on MPS devices. We only allow it to be moved to CPU or CUDA.
|
||||
if device is not None and device.type not in {"cpu", "cuda"}:
|
||||
device = None
|
||||
self._sam_model.to(device=device, dtype=dtype)
|
||||
return self
|
||||
|
||||
def calc_size(self) -> int:
|
||||
# HACK(ryand): Fix the circular import issue.
|
||||
from invokeai.backend.model_manager.load.model_util import calc_module_size
|
||||
|
||||
return calc_module_size(self._sam_model)
|
||||
|
||||
def segment(self, image: Image.Image, detection_results: list[DetectionResult]) -> torch.Tensor:
|
||||
boxes = self._to_box_array(detection_results)
|
||||
inputs = self._sam_processor(images=image, input_boxes=boxes, return_tensors="pt").to(self._sam_model.device)
|
||||
outputs = self._sam_model(**inputs)
|
||||
masks = self._sam_processor.post_process_masks(
|
||||
masks=outputs.pred_masks,
|
||||
original_sizes=inputs.original_sizes,
|
||||
reshaped_input_sizes=inputs.reshaped_input_sizes,
|
||||
)
|
||||
|
||||
# There should be only one batch.
|
||||
assert len(masks) == 1
|
||||
masks = masks[0]
|
||||
return masks
|
||||
|
||||
def _to_box_array(self, detection_results: list[DetectionResult]) -> list[list[list[int]]]:
|
||||
"""Convert a list of DetectionResults to the bbox format expected by the Segment Anything model."""
|
||||
boxes = [result.box.to_box() for result in detection_results]
|
||||
return [boxes]
|
Reference in New Issue
Block a user