IOPaint/inpaint/plugins/facexlib/detection/retinaface.py

import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torchvision.models._utils import IntermediateLayerGetter as IntermediateLayerGetter

from .align_trans import get_reference_facial_points, warp_and_crop_face
from .retinaface_net import (
    FPN,
    SSH,
    MobileNetV1,
    make_bbox_head,
    make_class_head,
    make_landmark_head,
)
from .retinaface_utils import (
    PriorBox,
    batched_decode,
    batched_decode_landm,
    decode,
    decode_landm,
    py_cpu_nms,
)


def generate_config(network_name):
    cfg_mnet = {
        "name": "mobilenet0.25",
        "min_sizes": [[16, 32], [64, 128], [256, 512]],
        "steps": [8, 16, 32],
        "variance": [0.1, 0.2],
        "clip": False,
        "loc_weight": 2.0,
        "gpu_train": True,
        "batch_size": 32,
        "ngpu": 1,
        "epoch": 250,
        "decay1": 190,
        "decay2": 220,
        "image_size": 640,
        "return_layers": {"stage1": 1, "stage2": 2, "stage3": 3},
        "in_channel": 32,
        "out_channel": 64,
    }

    cfg_re50 = {
        "name": "Resnet50",
        "min_sizes": [[16, 32], [64, 128], [256, 512]],
        "steps": [8, 16, 32],
        "variance": [0.1, 0.2],
        "clip": False,
        "loc_weight": 2.0,
        "gpu_train": True,
        "batch_size": 24,
        "ngpu": 4,
        "epoch": 100,
        "decay1": 70,
        "decay2": 90,
        "image_size": 840,
        "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3},
        "in_channel": 256,
        "out_channel": 256,
    }

    if network_name == "mobile0.25":
        return cfg_mnet
    elif network_name == "resnet50":
        return cfg_re50
    else:
        raise NotImplementedError(f"network_name={network_name}")


class RetinaFace(nn.Module):
    def __init__(self, network_name="resnet50", half=False, phase="test", device=None):
        self.device = (
            torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if device is None
            else device
        )

        super(RetinaFace, self).__init__()
        self.half_inference = half
        cfg = generate_config(network_name)
        self.backbone = cfg["name"]

        self.model_name = f"retinaface_{network_name}"
        self.cfg = cfg
        self.phase = phase
        self.target_size, self.max_size = 1600, 2150
        self.resize, self.scale, self.scale1 = 1.0, None, None
        self.mean_tensor = torch.tensor(
            [[[[104.0]], [[117.0]], [[123.0]]]], device=self.device
        )
        self.reference = get_reference_facial_points(default_square=True)
        # Build network.
        backbone = None
        if cfg["name"] == "mobilenet0.25":
            backbone = MobileNetV1()
            self.body = IntermediateLayerGetter(backbone, cfg["return_layers"])
        elif cfg["name"] == "Resnet50":
            import torchvision.models as models

            backbone = models.resnet50(pretrained=False)
            self.body = IntermediateLayerGetter(backbone, cfg["return_layers"])

        in_channels_stage2 = cfg["in_channel"]
        in_channels_list = [
            in_channels_stage2 * 2,
            in_channels_stage2 * 4,
            in_channels_stage2 * 8,
        ]

        out_channels = cfg["out_channel"]
        self.fpn = FPN(in_channels_list, out_channels)
        self.ssh1 = SSH(out_channels, out_channels)
        self.ssh2 = SSH(out_channels, out_channels)
        self.ssh3 = SSH(out_channels, out_channels)

        self.ClassHead = make_class_head(fpn_num=3, inchannels=cfg["out_channel"])
        self.BboxHead = make_bbox_head(fpn_num=3, inchannels=cfg["out_channel"])
        self.LandmarkHead = make_landmark_head(fpn_num=3, inchannels=cfg["out_channel"])

        self.to(self.device)
        self.eval()
        if self.half_inference:
            self.half()

    def forward(self, inputs):
        out = self.body(inputs)

        if self.backbone == "mobilenet0.25" or self.backbone == "Resnet50":
            out = list(out.values())
        # FPN
        fpn = self.fpn(out)

        # SSH
        feature1 = self.ssh1(fpn[0])
        feature2 = self.ssh2(fpn[1])
        feature3 = self.ssh3(fpn[2])
        features = [feature1, feature2, feature3]

        bbox_regressions = torch.cat(
            [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1
        )
        classifications = torch.cat(
            [self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1
        )
        tmp = [self.LandmarkHead[i](feature) for i, feature in enumerate(features)]
        ldm_regressions = torch.cat(tmp, dim=1)

        if self.phase == "train":
            output = (bbox_regressions, classifications, ldm_regressions)
        else:
            output = (
                bbox_regressions,
                F.softmax(classifications, dim=-1),
                ldm_regressions,
            )
        return output

    def __detect_faces(self, inputs):
        # get scale
        height, width = inputs.shape[2:]
        self.scale = torch.tensor(
            [width, height, width, height], dtype=torch.float32, device=self.device
        )
        tmp = [
            width,
            height,
            width,
            height,
            width,
            height,
            width,
            height,
            width,
            height,
        ]
        self.scale1 = torch.tensor(tmp, dtype=torch.float32, device=self.device)

        # forawrd
        inputs = inputs.to(self.device)
        if self.half_inference:
            inputs = inputs.half()
        loc, conf, landmarks = self(inputs)

        # get priorbox
        priorbox = PriorBox(self.cfg, image_size=inputs.shape[2:])
        priors = priorbox.forward().to(self.device)

        return loc, conf, landmarks, priors

    # single image detection
    def transform(self, image, use_origin_size):
        # convert to opencv format
        if isinstance(image, Image.Image):
            image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
        image = image.astype(np.float32)

        # testing scale
        im_size_min = np.min(image.shape[0:2])
        im_size_max = np.max(image.shape[0:2])
        resize = float(self.target_size) / float(im_size_min)

        # prevent bigger axis from being more than max_size
        if np.round(resize * im_size_max) > self.max_size:
            resize = float(self.max_size) / float(im_size_max)
        resize = 1 if use_origin_size else resize

        # resize
        if resize != 1:
            image = cv2.resize(
                image, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR
            )

        # convert to torch.tensor format
        # image -= (104, 117, 123)
        image = image.transpose(2, 0, 1)
        image = torch.from_numpy(image).unsqueeze(0)

        return image, resize

    def detect_faces(
        self,
        image,
        conf_threshold=0.8,
        nms_threshold=0.4,
        use_origin_size=True,
    ):
        image, self.resize = self.transform(image, use_origin_size)
        image = image.to(self.device)
        if self.half_inference:
            image = image.half()
        image = image - self.mean_tensor

        loc, conf, landmarks, priors = self.__detect_faces(image)

        boxes = decode(loc.data.squeeze(0), priors.data, self.cfg["variance"])
        boxes = boxes * self.scale / self.resize
        boxes = boxes.cpu().numpy()

        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        landmarks = decode_landm(landmarks.squeeze(0), priors, self.cfg["variance"])
        landmarks = landmarks * self.scale1 / self.resize
        landmarks = landmarks.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > conf_threshold)[0]
        boxes, landmarks, scores = boxes[inds], landmarks[inds], scores[inds]

        # sort
        order = scores.argsort()[::-1]
        boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]

        # do NMS
        bounding_boxes = np.hstack((boxes, scores[:, np.newaxis])).astype(
            np.float32, copy=False
        )
        keep = py_cpu_nms(bounding_boxes, nms_threshold)
        bounding_boxes, landmarks = bounding_boxes[keep, :], landmarks[keep]
        # self.t['forward_pass'].toc()
        # print(self.t['forward_pass'].average_time)
        # import sys
        # sys.stdout.flush()
        return np.concatenate((bounding_boxes, landmarks), axis=1)

    def __align_multi(self, image, boxes, landmarks, limit=None):
        if len(boxes) < 1:
            return [], []

        if limit:
            boxes = boxes[:limit]
            landmarks = landmarks[:limit]

        faces = []
        for landmark in landmarks:
            facial5points = [[landmark[2 * j], landmark[2 * j + 1]] for j in range(5)]

            warped_face = warp_and_crop_face(
                np.array(image), facial5points, self.reference, crop_size=(112, 112)
            )
            faces.append(warped_face)

        return np.concatenate((boxes, landmarks), axis=1), faces

    def align_multi(self, img, conf_threshold=0.8, limit=None):
        rlt = self.detect_faces(img, conf_threshold=conf_threshold)
        boxes, landmarks = rlt[:, 0:5], rlt[:, 5:]

        return self.__align_multi(img, boxes, landmarks, limit)

    # batched detection
    def batched_transform(self, frames, use_origin_size):
        """
        Arguments:
            frames: a list of PIL.Image, or torch.Tensor(shape=[n, h, w, c],
                type=np.float32, BGR format).
            use_origin_size: whether to use origin size.
        """
        from_PIL = True if isinstance(frames[0], Image.Image) else False

        # convert to opencv format
        if from_PIL:
            frames = [
                cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) for frame in frames
            ]
            frames = np.asarray(frames, dtype=np.float32)

        # testing scale
        im_size_min = np.min(frames[0].shape[0:2])
        im_size_max = np.max(frames[0].shape[0:2])
        resize = float(self.target_size) / float(im_size_min)

        # prevent bigger axis from being more than max_size
        if np.round(resize * im_size_max) > self.max_size:
            resize = float(self.max_size) / float(im_size_max)
        resize = 1 if use_origin_size else resize

        # resize
        if resize != 1:
            if not from_PIL:
                frames = F.interpolate(frames, scale_factor=resize)
            else:
                frames = [
                    cv2.resize(
                        frame,
                        None,
                        None,
                        fx=resize,
                        fy=resize,
                        interpolation=cv2.INTER_LINEAR,
                    )
                    for frame in frames
                ]

        # convert to torch.tensor format
        if not from_PIL:
            frames = frames.transpose(1, 2).transpose(1, 3).contiguous()
        else:
            frames = frames.transpose((0, 3, 1, 2))
            frames = torch.from_numpy(frames)

        return frames, resize

    def batched_detect_faces(
        self, frames, conf_threshold=0.8, nms_threshold=0.4, use_origin_size=True
    ):
        """
        Arguments:
            frames: a list of PIL.Image, or np.array(shape=[n, h, w, c],
                type=np.uint8, BGR format).
            conf_threshold: confidence threshold.
            nms_threshold: nms threshold.
            use_origin_size: whether to use origin size.
        Returns:
            final_bounding_boxes: list of np.array ([n_boxes, 5],
                type=np.float32).
            final_landmarks: list of np.array ([n_boxes, 10], type=np.float32).
        """
        # self.t['forward_pass'].tic()
        frames, self.resize = self.batched_transform(frames, use_origin_size)
        frames = frames.to(self.device)
        frames = frames - self.mean_tensor

        b_loc, b_conf, b_landmarks, priors = self.__detect_faces(frames)

        final_bounding_boxes, final_landmarks = [], []

        # decode
        priors = priors.unsqueeze(0)
        b_loc = (
            batched_decode(b_loc, priors, self.cfg["variance"])
            * self.scale
            / self.resize
        )
        b_landmarks = (
            batched_decode_landm(b_landmarks, priors, self.cfg["variance"])
            * self.scale1
            / self.resize
        )
        b_conf = b_conf[:, :, 1]

        # index for selection
        b_indice = b_conf > conf_threshold

        # concat
        b_loc_and_conf = torch.cat((b_loc, b_conf.unsqueeze(-1)), dim=2).float()

        for pred, landm, inds in zip(b_loc_and_conf, b_landmarks, b_indice):
            # ignore low scores
            pred, landm = pred[inds, :], landm[inds, :]
            if pred.shape[0] == 0:
                final_bounding_boxes.append(np.array([], dtype=np.float32))
                final_landmarks.append(np.array([], dtype=np.float32))
                continue

            # sort
            # order = score.argsort(descending=True)
            # box, landm, score = box[order], landm[order], score[order]

            # to CPU
            bounding_boxes, landm = pred.cpu().numpy(), landm.cpu().numpy()

            # NMS
            keep = py_cpu_nms(bounding_boxes, nms_threshold)
            bounding_boxes, landmarks = bounding_boxes[keep, :], landm[keep]

            # append
            final_bounding_boxes.append(bounding_boxes)
            final_landmarks.append(landmarks)
        # self.t['forward_pass'].toc(average=True)
        # self.batch_time += self.t['forward_pass'].diff
        # self.total_frame += len(frames)
        # print(self.batch_time / self.total_frame)

        return final_bounding_boxes, final_landmarks
new file: inpaint/__init__.py new file: inpaint/__main__.py new file: inpaint/api.py new file: inpaint/batch_processing.py new file: inpaint/benchmark.py new file: inpaint/cli.py new file: inpaint/const.py new file: inpaint/download.py new file: inpaint/file_manager/__init__.py new file: inpaint/file_manager/file_manager.py new file: inpaint/file_manager/storage_backends.py new file: inpaint/file_manager/utils.py new file: inpaint/helper.py new file: inpaint/installer.py new file: inpaint/model/__init__.py new file: inpaint/model/anytext/__init__.py new file: inpaint/model/anytext/anytext_model.py new file: inpaint/model/anytext/anytext_pipeline.py new file: inpaint/model/anytext/anytext_sd15.yaml new file: inpaint/model/anytext/cldm/__init__.py new file: inpaint/model/anytext/cldm/cldm.py new file: inpaint/model/anytext/cldm/ddim_hacked.py new file: inpaint/model/anytext/cldm/embedding_manager.py new file: inpaint/model/anytext/cldm/hack.py new file: inpaint/model/anytext/cldm/model.py new file: inpaint/model/anytext/cldm/recognizer.py new file: inpaint/model/anytext/ldm/__init__.py new file: inpaint/model/anytext/ldm/models/__init__.py new file: inpaint/model/anytext/ldm/models/autoencoder.py new file: inpaint/model/anytext/ldm/models/diffusion/__init__.py new file: inpaint/model/anytext/ldm/models/diffusion/ddim.py new file: inpaint/model/anytext/ldm/models/diffusion/ddpm.py new file: inpaint/model/anytext/ldm/models/diffusion/dpm_solver/__init__.py new file: inpaint/model/anytext/ldm/models/diffusion/dpm_solver/dpm_solver.py new file: inpaint/model/anytext/ldm/models/diffusion/dpm_solver/sampler.py new file: inpaint/model/anytext/ldm/models/diffusion/plms.py new file: inpaint/model/anytext/ldm/models/diffusion/sampling_util.py new file: inpaint/model/anytext/ldm/modules/__init__.py new file: inpaint/model/anytext/ldm/modules/attention.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/__init__.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/model.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/openaimodel.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/upscaling.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/util.py new file: inpaint/model/anytext/ldm/modules/distributions/__init__.py new file: inpaint/model/anytext/ldm/modules/distributions/distributions.py new file: inpaint/model/anytext/ldm/modules/ema.py new file: inpaint/model/anytext/ldm/modules/encoders/__init__.py new file: inpaint/model/anytext/ldm/modules/encoders/modules.py new file: inpaint/model/anytext/ldm/util.py new file: inpaint/model/anytext/main.py new file: inpaint/model/anytext/ocr_recog/RNN.py new file: inpaint/model/anytext/ocr_recog/RecCTCHead.py new file: inpaint/model/anytext/ocr_recog/RecModel.py new file: inpaint/model/anytext/ocr_recog/RecMv1_enhance.py new file: inpaint/model/anytext/ocr_recog/RecSVTR.py new file: inpaint/model/anytext/ocr_recog/__init__.py new file: inpaint/model/anytext/ocr_recog/common.py new file: inpaint/model/anytext/ocr_recog/en_dict.txt new file: inpaint/model/anytext/ocr_recog/ppocr_keys_v1.txt new file: inpaint/model/anytext/utils.py new file: inpaint/model/base.py new file: inpaint/model/brushnet/__init__.py new file: inpaint/model/brushnet/brushnet.py new file: inpaint/model/brushnet/brushnet_unet_forward.py new file: inpaint/model/brushnet/brushnet_wrapper.py new file: inpaint/model/brushnet/pipeline_brushnet.py new file: inpaint/model/brushnet/unet_2d_blocks.py new file: inpaint/model/controlnet.py new file: inpaint/model/ddim_sampler.py new file: inpaint/model/fcf.py new file: inpaint/model/helper/__init__.py new file: inpaint/model/helper/controlnet_preprocess.py new file: inpaint/model/helper/cpu_text_encoder.py new file: inpaint/model/helper/g_diffuser_bot.py new file: inpaint/model/instruct_pix2pix.py new file: inpaint/model/kandinsky.py new file: inpaint/model/lama.py new file: inpaint/model/ldm.py new file: inpaint/model/manga.py new file: inpaint/model/mat.py new file: inpaint/model/mi_gan.py new file: inpaint/model/opencv2.py new file: inpaint/model/original_sd_configs/__init__.py new file: inpaint/model/original_sd_configs/sd_xl_base.yaml new file: inpaint/model/original_sd_configs/sd_xl_refiner.yaml new file: inpaint/model/original_sd_configs/v1-inference.yaml new file: inpaint/model/original_sd_configs/v2-inference-v.yaml new file: inpaint/model/paint_by_example.py new file: inpaint/model/plms_sampler.py new file: inpaint/model/power_paint/__init__.py new file: inpaint/model/power_paint/pipeline_powerpaint.py new file: inpaint/model/power_paint/power_paint.py new file: inpaint/model/power_paint/power_paint_v2.py new file: inpaint/model/power_paint/powerpaint_tokenizer.py 2024-08-20 21:17:33 +02:00			`import cv2`
			`import numpy as np`
			`import torch`
			`import torch.nn as nn`
			`import torch.nn.functional as F`
			`from PIL import Image`
			`from torchvision.models._utils import IntermediateLayerGetter as IntermediateLayerGetter`

			`from .align_trans import get_reference_facial_points, warp_and_crop_face`
			`from .retinaface_net import (`
			`FPN,`
			`SSH,`
			`MobileNetV1,`
			`make_bbox_head,`
			`make_class_head,`
			`make_landmark_head,`
			`)`
			`from .retinaface_utils import (`
			`PriorBox,`
			`batched_decode,`
			`batched_decode_landm,`
			`decode,`
			`decode_landm,`
			`py_cpu_nms,`
			`)`


			`def generate_config(network_name):`
			`cfg_mnet = {`
			`"name": "mobilenet0.25",`
			`"min_sizes": [[16, 32], [64, 128], [256, 512]],`
			`"steps": [8, 16, 32],`
			`"variance": [0.1, 0.2],`
			`"clip": False,`
			`"loc_weight": 2.0,`
			`"gpu_train": True,`
			`"batch_size": 32,`
			`"ngpu": 1,`
			`"epoch": 250,`
			`"decay1": 190,`
			`"decay2": 220,`
			`"image_size": 640,`
			`"return_layers": {"stage1": 1, "stage2": 2, "stage3": 3},`
			`"in_channel": 32,`
			`"out_channel": 64,`
			`}`

			`cfg_re50 = {`
			`"name": "Resnet50",`
			`"min_sizes": [[16, 32], [64, 128], [256, 512]],`
			`"steps": [8, 16, 32],`
			`"variance": [0.1, 0.2],`
			`"clip": False,`
			`"loc_weight": 2.0,`
			`"gpu_train": True,`
			`"batch_size": 24,`
			`"ngpu": 4,`
			`"epoch": 100,`
			`"decay1": 70,`
			`"decay2": 90,`
			`"image_size": 840,`
			`"return_layers": {"layer2": 1, "layer3": 2, "layer4": 3},`
			`"in_channel": 256,`
			`"out_channel": 256,`
			`}`

			`if network_name == "mobile0.25":`
			`return cfg_mnet`
			`elif network_name == "resnet50":`
			`return cfg_re50`
			`else:`
			`raise NotImplementedError(f"network_name={network_name}")`


			`class RetinaFace(nn.Module):`
			`def __init__(self, network_name="resnet50", half=False, phase="test", device=None):`
			`self.device = (`
			`torch.device("cuda" if torch.cuda.is_available() else "cpu")`
			`if device is None`
			`else device`
			`)`

			`super(RetinaFace, self).__init__()`
			`self.half_inference = half`
			`cfg = generate_config(network_name)`
			`self.backbone = cfg["name"]`

			`self.model_name = f"retinaface_{network_name}"`
			`self.cfg = cfg`
			`self.phase = phase`
			`self.target_size, self.max_size = 1600, 2150`
			`self.resize, self.scale, self.scale1 = 1.0, None, None`
			`self.mean_tensor = torch.tensor(`
			`[[[[104.0]], [[117.0]], [[123.0]]]], device=self.device`
			`)`
			`self.reference = get_reference_facial_points(default_square=True)`
			`# Build network.`
			`backbone = None`
			`if cfg["name"] == "mobilenet0.25":`
			`backbone = MobileNetV1()`
			`self.body = IntermediateLayerGetter(backbone, cfg["return_layers"])`
			`elif cfg["name"] == "Resnet50":`
			`import torchvision.models as models`

			`backbone = models.resnet50(pretrained=False)`
			`self.body = IntermediateLayerGetter(backbone, cfg["return_layers"])`

			`in_channels_stage2 = cfg["in_channel"]`
			`in_channels_list = [`
			`in_channels_stage2 * 2,`
			`in_channels_stage2 * 4,`
			`in_channels_stage2 * 8,`
			`]`

			`out_channels = cfg["out_channel"]`
			`self.fpn = FPN(in_channels_list, out_channels)`
			`self.ssh1 = SSH(out_channels, out_channels)`
			`self.ssh2 = SSH(out_channels, out_channels)`
			`self.ssh3 = SSH(out_channels, out_channels)`

			`self.ClassHead = make_class_head(fpn_num=3, inchannels=cfg["out_channel"])`
			`self.BboxHead = make_bbox_head(fpn_num=3, inchannels=cfg["out_channel"])`
			`self.LandmarkHead = make_landmark_head(fpn_num=3, inchannels=cfg["out_channel"])`

			`self.to(self.device)`
			`self.eval()`
			`if self.half_inference:`
			`self.half()`

			`def forward(self, inputs):`
			`out = self.body(inputs)`

			`if self.backbone == "mobilenet0.25" or self.backbone == "Resnet50":`
			`out = list(out.values())`
			`# FPN`
			`fpn = self.fpn(out)`

			`# SSH`
			`feature1 = self.ssh1(fpn[0])`
			`feature2 = self.ssh2(fpn[1])`
			`feature3 = self.ssh3(fpn[2])`
			`features = [feature1, feature2, feature3]`

			`bbox_regressions = torch.cat(`
			`[self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1`
			`)`
			`classifications = torch.cat(`
			`[self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1`
			`)`
			`tmp = [self.LandmarkHead[i](feature) for i, feature in enumerate(features)]`
			`ldm_regressions = torch.cat(tmp, dim=1)`

			`if self.phase == "train":`
			`output = (bbox_regressions, classifications, ldm_regressions)`
			`else:`
			`output = (`
			`bbox_regressions,`
			`F.softmax(classifications, dim=-1),`
			`ldm_regressions,`
			`)`
			`return output`

			`def __detect_faces(self, inputs):`
			`# get scale`
			`height, width = inputs.shape[2:]`
			`self.scale = torch.tensor(`
			`[width, height, width, height], dtype=torch.float32, device=self.device`
			`)`
			`tmp = [`
			`width,`
			`height,`
			`width,`
			`height,`
			`width,`
			`height,`
			`width,`
			`height,`
			`width,`
			`height,`
			`]`
			`self.scale1 = torch.tensor(tmp, dtype=torch.float32, device=self.device)`

			`# forawrd`
			`inputs = inputs.to(self.device)`
			`if self.half_inference:`
			`inputs = inputs.half()`
			`loc, conf, landmarks = self(inputs)`

			`# get priorbox`
			`priorbox = PriorBox(self.cfg, image_size=inputs.shape[2:])`
			`priors = priorbox.forward().to(self.device)`

			`return loc, conf, landmarks, priors`

			`# single image detection`
			`def transform(self, image, use_origin_size):`
			`# convert to opencv format`
			`if isinstance(image, Image.Image):`
			`image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)`
			`image = image.astype(np.float32)`

			`# testing scale`
			`im_size_min = np.min(image.shape[0:2])`
			`im_size_max = np.max(image.shape[0:2])`
			`resize = float(self.target_size) / float(im_size_min)`

			`# prevent bigger axis from being more than max_size`
			`if np.round(resize * im_size_max) > self.max_size:`
			`resize = float(self.max_size) / float(im_size_max)`
			`resize = 1 if use_origin_size else resize`

			`# resize`
			`if resize != 1:`
			`image = cv2.resize(`
			`image, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR`
			`)`

			`# convert to torch.tensor format`
			`# image -= (104, 117, 123)`
			`image = image.transpose(2, 0, 1)`
			`image = torch.from_numpy(image).unsqueeze(0)`

			`return image, resize`

			`def detect_faces(`
			`self,`
			`image,`
			`conf_threshold=0.8,`
			`nms_threshold=0.4,`
			`use_origin_size=True,`
			`):`
			`image, self.resize = self.transform(image, use_origin_size)`
			`image = image.to(self.device)`
			`if self.half_inference:`
			`image = image.half()`
			`image = image - self.mean_tensor`

			`loc, conf, landmarks, priors = self.__detect_faces(image)`

			`boxes = decode(loc.data.squeeze(0), priors.data, self.cfg["variance"])`
			`boxes = boxes * self.scale / self.resize`
			`boxes = boxes.cpu().numpy()`

			`scores = conf.squeeze(0).data.cpu().numpy()[:, 1]`

			`landmarks = decode_landm(landmarks.squeeze(0), priors, self.cfg["variance"])`
			`landmarks = landmarks * self.scale1 / self.resize`
			`landmarks = landmarks.cpu().numpy()`

			`# ignore low scores`
			`inds = np.where(scores > conf_threshold)[0]`
			`boxes, landmarks, scores = boxes[inds], landmarks[inds], scores[inds]`

			`# sort`
			`order = scores.argsort()[::-1]`
			`boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]`

			`# do NMS`
			`bounding_boxes = np.hstack((boxes, scores[:, np.newaxis])).astype(`
			`np.float32, copy=False`
			`)`
			`keep = py_cpu_nms(bounding_boxes, nms_threshold)`
			`bounding_boxes, landmarks = bounding_boxes[keep, :], landmarks[keep]`
			`# self.t['forward_pass'].toc()`
			`# print(self.t['forward_pass'].average_time)`
			`# import sys`
			`# sys.stdout.flush()`
			`return np.concatenate((bounding_boxes, landmarks), axis=1)`

			`def __align_multi(self, image, boxes, landmarks, limit=None):`
			`if len(boxes) < 1:`
			`return [], []`

			`if limit:`
			`boxes = boxes[:limit]`
			`landmarks = landmarks[:limit]`

			`faces = []`
			`for landmark in landmarks:`
			`facial5points = [[landmark[2 * j], landmark[2 * j + 1]] for j in range(5)]`

			`warped_face = warp_and_crop_face(`
			`np.array(image), facial5points, self.reference, crop_size=(112, 112)`
			`)`
			`faces.append(warped_face)`

			`return np.concatenate((boxes, landmarks), axis=1), faces`

			`def align_multi(self, img, conf_threshold=0.8, limit=None):`
			`rlt = self.detect_faces(img, conf_threshold=conf_threshold)`
			`boxes, landmarks = rlt[:, 0:5], rlt[:, 5:]`

			`return self.__align_multi(img, boxes, landmarks, limit)`

			`# batched detection`
			`def batched_transform(self, frames, use_origin_size):`
			`"""`
			`Arguments:`
			`frames: a list of PIL.Image, or torch.Tensor(shape=[n, h, w, c],`
			`type=np.float32, BGR format).`
			`use_origin_size: whether to use origin size.`
			`"""`
			`from_PIL = True if isinstance(frames[0], Image.Image) else False`

			`# convert to opencv format`
			`if from_PIL:`
			`frames = [`
			`cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) for frame in frames`
			`]`
			`frames = np.asarray(frames, dtype=np.float32)`

			`# testing scale`
			`im_size_min = np.min(frames[0].shape[0:2])`
			`im_size_max = np.max(frames[0].shape[0:2])`
			`resize = float(self.target_size) / float(im_size_min)`

			`# prevent bigger axis from being more than max_size`
			`if np.round(resize * im_size_max) > self.max_size:`
			`resize = float(self.max_size) / float(im_size_max)`
			`resize = 1 if use_origin_size else resize`

			`# resize`
			`if resize != 1:`
			`if not from_PIL:`
			`frames = F.interpolate(frames, scale_factor=resize)`
			`else:`
			`frames = [`
			`cv2.resize(`
			`frame,`
			`None,`
			`None,`
			`fx=resize,`
			`fy=resize,`
			`interpolation=cv2.INTER_LINEAR,`
			`)`
			`for frame in frames`
			`]`

			`# convert to torch.tensor format`
			`if not from_PIL:`
			`frames = frames.transpose(1, 2).transpose(1, 3).contiguous()`
			`else:`
			`frames = frames.transpose((0, 3, 1, 2))`
			`frames = torch.from_numpy(frames)`

			`return frames, resize`

			`def batched_detect_faces(`
			`self, frames, conf_threshold=0.8, nms_threshold=0.4, use_origin_size=True`
			`):`
			`"""`
			`Arguments:`
			`frames: a list of PIL.Image, or np.array(shape=[n, h, w, c],`
			`type=np.uint8, BGR format).`
			`conf_threshold: confidence threshold.`
			`nms_threshold: nms threshold.`
			`use_origin_size: whether to use origin size.`
			`Returns:`
			`final_bounding_boxes: list of np.array ([n_boxes, 5],`
			`type=np.float32).`
			`final_landmarks: list of np.array ([n_boxes, 10], type=np.float32).`
			`"""`
			`# self.t['forward_pass'].tic()`
			`frames, self.resize = self.batched_transform(frames, use_origin_size)`
			`frames = frames.to(self.device)`
			`frames = frames - self.mean_tensor`

			`b_loc, b_conf, b_landmarks, priors = self.__detect_faces(frames)`

			`final_bounding_boxes, final_landmarks = [], []`

			`# decode`
			`priors = priors.unsqueeze(0)`
			`b_loc = (`
			`batched_decode(b_loc, priors, self.cfg["variance"])`
			`* self.scale`
			`/ self.resize`
			`)`
			`b_landmarks = (`
			`batched_decode_landm(b_landmarks, priors, self.cfg["variance"])`
			`* self.scale1`
			`/ self.resize`
			`)`
			`b_conf = b_conf[:, :, 1]`

			`# index for selection`
			`b_indice = b_conf > conf_threshold`

			`# concat`
			`b_loc_and_conf = torch.cat((b_loc, b_conf.unsqueeze(-1)), dim=2).float()`

			`for pred, landm, inds in zip(b_loc_and_conf, b_landmarks, b_indice):`
			`# ignore low scores`
			`pred, landm = pred[inds, :], landm[inds, :]`
			`if pred.shape[0] == 0:`
			`final_bounding_boxes.append(np.array([], dtype=np.float32))`
			`final_landmarks.append(np.array([], dtype=np.float32))`
			`continue`

			`# sort`
			`# order = score.argsort(descending=True)`
			`# box, landm, score = box[order], landm[order], score[order]`

			`# to CPU`
			`bounding_boxes, landm = pred.cpu().numpy(), landm.cpu().numpy()`

			`# NMS`
			`keep = py_cpu_nms(bounding_boxes, nms_threshold)`
			`bounding_boxes, landmarks = bounding_boxes[keep, :], landm[keep]`

			`# append`
			`final_bounding_boxes.append(bounding_boxes)`
			`final_landmarks.append(landmarks)`
			`# self.t['forward_pass'].toc(average=True)`
			`# self.batch_time += self.t['forward_pass'].diff`
			`# self.total_frame += len(frames)`
			`# print(self.batch_time / self.total_frame)`

			`return final_bounding_boxes, final_landmarks`