IOPaint/iopaint/plugins/interactive_seg.py

108 lines
3.6 KiB
Python
Raw Normal View History

2024-01-02 04:07:35 +01:00
import hashlib
from typing import List
2022-11-27 14:25:27 +01:00
2023-03-22 05:57:18 +01:00
import numpy as np
import torch
2022-11-27 14:25:27 +01:00
from loguru import logger
2024-01-05 08:19:23 +01:00
from iopaint.helper import download_model
from iopaint.plugins.base_plugin import BasePlugin
from iopaint.plugins.segment_anything import SamPredictor, sam_model_registry
2024-02-20 02:03:11 +01:00
from iopaint.plugins.segment_anything.predictor_hq import SamHQPredictor
2024-01-05 08:19:23 +01:00
from iopaint.schema import RunPluginRequest
2023-04-06 15:55:20 +02:00
# 从小到大
SEGMENT_ANYTHING_MODELS = {
"vit_b": {
"url": "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth",
"md5": "01ec64d29a2fca3f0661936605ae66f8",
},
"vit_l": {
"url": "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth",
"md5": "0b3195507c641ddb6910d2bb5adee89c",
},
"vit_h": {
"url": "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
"md5": "4b8939a88964f0f4ff5f5b2642c598a6",
},
2023-12-24 08:32:27 +01:00
"mobile_sam": {
2023-10-07 06:48:29 +02:00
"url": "https://github.com/Sanster/models/releases/download/MobileSAM/mobile_sam.pt",
"md5": "f3c0d8cda613564d499310dab6c812cd",
},
2024-02-20 02:03:11 +01:00
"sam_hq_vit_b": {
"url": "https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_b.pth",
"md5": "c6b8953247bcfdc8bb8ef91e36a6cacc",
},
"sam_hq_vit_l": {
"url": "https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_l.pth",
"md5": "08947267966e4264fb39523eccc33f86",
},
"sam_hq_vit_h": {
"url": "https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_h.pth",
"md5": "3560f6b6a5a6edacd814a1325c39640a",
},
2023-04-06 15:55:20 +02:00
}
2022-11-27 14:25:27 +01:00
2023-03-26 06:37:58 +02:00
class InteractiveSeg(BasePlugin):
2023-03-22 05:57:18 +01:00
name = "InteractiveSeg"
support_gen_mask = True
2023-03-22 05:57:18 +01:00
2023-04-06 15:55:20 +02:00
def __init__(self, model_name, device):
2023-03-26 06:37:58 +02:00
super().__init__()
2024-02-10 05:34:56 +01:00
self.model_name = model_name
self.device = device
self._init_session(model_name)
def _init_session(self, model_name: str):
2023-04-06 15:55:20 +02:00
model_path = download_model(
SEGMENT_ANYTHING_MODELS[model_name]["url"],
SEGMENT_ANYTHING_MODELS[model_name]["md5"],
)
logger.info(f"SegmentAnything model path: {model_path}")
2024-02-20 02:03:11 +01:00
if "sam_hq" in model_name:
self.predictor = SamHQPredictor(
sam_model_registry[model_name](checkpoint=model_path).to(self.device)
)
else:
self.predictor = SamPredictor(
sam_model_registry[model_name](checkpoint=model_path).to(self.device)
)
2023-04-06 15:55:20 +02:00
self.prev_img_md5 = None
2023-03-22 05:57:18 +01:00
2024-02-10 05:34:56 +01:00
def switch_model(self, new_model_name):
if self.model_name == new_model_name:
return
logger.info(
f"Switching InteractiveSeg model from {self.model_name} to {new_model_name}"
)
self._init_session(new_model_name)
self.model_name = new_model_name
def gen_mask(self, rgb_np_img, req: RunPluginRequest) -> np.ndarray:
2024-01-02 04:07:35 +01:00
img_md5 = hashlib.md5(req.image.encode("utf-8")).hexdigest()
return self.forward(rgb_np_img, req.clicks, img_md5)
2023-04-06 15:55:20 +02:00
@torch.inference_mode()
2024-01-02 04:07:35 +01:00
def forward(self, rgb_np_img, clicks: List[List], img_md5: str):
2023-04-06 15:55:20 +02:00
input_point = []
input_label = []
for click in clicks:
x = click[0]
y = click[1]
input_point.append([x, y])
input_label.append(click[2])
if img_md5 and img_md5 != self.prev_img_md5:
self.prev_img_md5 = img_md5
self.predictor.set_image(rgb_np_img)
masks, scores, _ = self.predictor.predict(
point_coords=np.array(input_point),
point_labels=np.array(input_label),
multimask_output=False,
)
mask = masks[0].astype(np.uint8) * 255
return mask