78 lines
2.7 KiB
Python
78 lines
2.7 KiB
Python
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||
|
# All rights reserved.
|
||
|
|
||
|
# This source code is licensed under the license found in the
|
||
|
# LICENSE file in the root directory of this source tree.
|
||
|
|
||
|
import torch
|
||
|
import torch.nn as nn
|
||
|
from torchvision.transforms import Normalize, Resize, ToTensor
|
||
|
|
||
|
|
||
|
class SAM2Transforms(nn.Module):
|
||
|
def __init__(
|
||
|
self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0
|
||
|
):
|
||
|
"""
|
||
|
Transforms for SAM2.
|
||
|
"""
|
||
|
super().__init__()
|
||
|
self.resolution = resolution
|
||
|
self.mask_threshold = mask_threshold
|
||
|
self.max_hole_area = max_hole_area
|
||
|
self.max_sprinkle_area = max_sprinkle_area
|
||
|
self.mean = [0.485, 0.456, 0.406]
|
||
|
self.std = [0.229, 0.224, 0.225]
|
||
|
self.to_tensor = ToTensor()
|
||
|
self.transforms = torch.jit.script(
|
||
|
nn.Sequential(
|
||
|
Resize((self.resolution, self.resolution)),
|
||
|
Normalize(self.mean, self.std),
|
||
|
)
|
||
|
)
|
||
|
|
||
|
def __call__(self, x):
|
||
|
x = self.to_tensor(x)
|
||
|
return self.transforms(x)
|
||
|
|
||
|
def forward_batch(self, img_list):
|
||
|
img_batch = [self.transforms(self.to_tensor(img)) for img in img_list]
|
||
|
img_batch = torch.stack(img_batch, dim=0)
|
||
|
return img_batch
|
||
|
|
||
|
def transform_coords(
|
||
|
self, coords: torch.Tensor, normalize=False, orig_hw=None
|
||
|
) -> torch.Tensor:
|
||
|
"""
|
||
|
Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates,
|
||
|
If the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
|
||
|
|
||
|
Returns
|
||
|
Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model.
|
||
|
"""
|
||
|
if normalize:
|
||
|
assert orig_hw is not None
|
||
|
h, w = orig_hw
|
||
|
coords = coords.clone()
|
||
|
coords[..., 0] = coords[..., 0] / w
|
||
|
coords[..., 1] = coords[..., 1] / h
|
||
|
|
||
|
coords = coords * self.resolution # unnormalize coords
|
||
|
return coords
|
||
|
|
||
|
def transform_boxes(
|
||
|
self, boxes: torch.Tensor, normalize=False, orig_hw=None
|
||
|
) -> torch.Tensor:
|
||
|
"""
|
||
|
Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates,
|
||
|
if the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
|
||
|
"""
|
||
|
boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw)
|
||
|
return boxes
|
||
|
|
||
|
def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor:
|
||
|
"""
|
||
|
Perform PostProcessing on output masks.
|
||
|
"""
|
||
|
return masks
|