70af4845af
new file: inpaint/__main__.py new file: inpaint/api.py new file: inpaint/batch_processing.py new file: inpaint/benchmark.py new file: inpaint/cli.py new file: inpaint/const.py new file: inpaint/download.py new file: inpaint/file_manager/__init__.py new file: inpaint/file_manager/file_manager.py new file: inpaint/file_manager/storage_backends.py new file: inpaint/file_manager/utils.py new file: inpaint/helper.py new file: inpaint/installer.py new file: inpaint/model/__init__.py new file: inpaint/model/anytext/__init__.py new file: inpaint/model/anytext/anytext_model.py new file: inpaint/model/anytext/anytext_pipeline.py new file: inpaint/model/anytext/anytext_sd15.yaml new file: inpaint/model/anytext/cldm/__init__.py new file: inpaint/model/anytext/cldm/cldm.py new file: inpaint/model/anytext/cldm/ddim_hacked.py new file: inpaint/model/anytext/cldm/embedding_manager.py new file: inpaint/model/anytext/cldm/hack.py new file: inpaint/model/anytext/cldm/model.py new file: inpaint/model/anytext/cldm/recognizer.py new file: inpaint/model/anytext/ldm/__init__.py new file: inpaint/model/anytext/ldm/models/__init__.py new file: inpaint/model/anytext/ldm/models/autoencoder.py new file: inpaint/model/anytext/ldm/models/diffusion/__init__.py new file: inpaint/model/anytext/ldm/models/diffusion/ddim.py new file: inpaint/model/anytext/ldm/models/diffusion/ddpm.py new file: inpaint/model/anytext/ldm/models/diffusion/dpm_solver/__init__.py new file: inpaint/model/anytext/ldm/models/diffusion/dpm_solver/dpm_solver.py new file: inpaint/model/anytext/ldm/models/diffusion/dpm_solver/sampler.py new file: inpaint/model/anytext/ldm/models/diffusion/plms.py new file: inpaint/model/anytext/ldm/models/diffusion/sampling_util.py new file: inpaint/model/anytext/ldm/modules/__init__.py new file: inpaint/model/anytext/ldm/modules/attention.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/__init__.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/model.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/openaimodel.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/upscaling.py new file: inpaint/model/anytext/ldm/modules/diffusionmodules/util.py new file: inpaint/model/anytext/ldm/modules/distributions/__init__.py new file: inpaint/model/anytext/ldm/modules/distributions/distributions.py new file: inpaint/model/anytext/ldm/modules/ema.py new file: inpaint/model/anytext/ldm/modules/encoders/__init__.py new file: inpaint/model/anytext/ldm/modules/encoders/modules.py new file: inpaint/model/anytext/ldm/util.py new file: inpaint/model/anytext/main.py new file: inpaint/model/anytext/ocr_recog/RNN.py new file: inpaint/model/anytext/ocr_recog/RecCTCHead.py new file: inpaint/model/anytext/ocr_recog/RecModel.py new file: inpaint/model/anytext/ocr_recog/RecMv1_enhance.py new file: inpaint/model/anytext/ocr_recog/RecSVTR.py new file: inpaint/model/anytext/ocr_recog/__init__.py new file: inpaint/model/anytext/ocr_recog/common.py new file: inpaint/model/anytext/ocr_recog/en_dict.txt new file: inpaint/model/anytext/ocr_recog/ppocr_keys_v1.txt new file: inpaint/model/anytext/utils.py new file: inpaint/model/base.py new file: inpaint/model/brushnet/__init__.py new file: inpaint/model/brushnet/brushnet.py new file: inpaint/model/brushnet/brushnet_unet_forward.py new file: inpaint/model/brushnet/brushnet_wrapper.py new file: inpaint/model/brushnet/pipeline_brushnet.py new file: inpaint/model/brushnet/unet_2d_blocks.py new file: inpaint/model/controlnet.py new file: inpaint/model/ddim_sampler.py new file: inpaint/model/fcf.py new file: inpaint/model/helper/__init__.py new file: inpaint/model/helper/controlnet_preprocess.py new file: inpaint/model/helper/cpu_text_encoder.py new file: inpaint/model/helper/g_diffuser_bot.py new file: inpaint/model/instruct_pix2pix.py new file: inpaint/model/kandinsky.py new file: inpaint/model/lama.py new file: inpaint/model/ldm.py new file: inpaint/model/manga.py new file: inpaint/model/mat.py new file: inpaint/model/mi_gan.py new file: inpaint/model/opencv2.py new file: inpaint/model/original_sd_configs/__init__.py new file: inpaint/model/original_sd_configs/sd_xl_base.yaml new file: inpaint/model/original_sd_configs/sd_xl_refiner.yaml new file: inpaint/model/original_sd_configs/v1-inference.yaml new file: inpaint/model/original_sd_configs/v2-inference-v.yaml new file: inpaint/model/paint_by_example.py new file: inpaint/model/plms_sampler.py new file: inpaint/model/power_paint/__init__.py new file: inpaint/model/power_paint/pipeline_powerpaint.py new file: inpaint/model/power_paint/power_paint.py new file: inpaint/model/power_paint/power_paint_v2.py new file: inpaint/model/power_paint/powerpaint_tokenizer.py
134 lines
4.6 KiB
Python
134 lines
4.6 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
|
|
# This source code is licensed under the license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
from typing import List, Optional
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
|
|
|
|
class ImageEncoder(nn.Module):
|
|
def __init__(
|
|
self,
|
|
trunk: nn.Module,
|
|
neck: nn.Module,
|
|
scalp: int = 0,
|
|
):
|
|
super().__init__()
|
|
self.trunk = trunk
|
|
self.neck = neck
|
|
self.scalp = scalp
|
|
assert (
|
|
self.trunk.channel_list == self.neck.backbone_channel_list
|
|
), f"Channel dims of trunk and neck do not match. Trunk: {self.trunk.channel_list}, neck: {self.neck.backbone_channel_list}"
|
|
|
|
def forward(self, sample: torch.Tensor):
|
|
# Forward through backbone
|
|
features, pos = self.neck(self.trunk(sample))
|
|
if self.scalp > 0:
|
|
# Discard the lowest resolution features
|
|
features, pos = features[: -self.scalp], pos[: -self.scalp]
|
|
|
|
src = features[-1]
|
|
output = {
|
|
"vision_features": src,
|
|
"vision_pos_enc": pos,
|
|
"backbone_fpn": features,
|
|
}
|
|
return output
|
|
|
|
|
|
class FpnNeck(nn.Module):
|
|
"""
|
|
A modified variant of Feature Pyramid Network (FPN) neck
|
|
(we remove output conv and also do bicubic interpolation similar to ViT
|
|
pos embed interpolation)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
position_encoding: nn.Module,
|
|
d_model: int,
|
|
backbone_channel_list: List[int],
|
|
kernel_size: int = 1,
|
|
stride: int = 1,
|
|
padding: int = 0,
|
|
fpn_interp_model: str = "bilinear",
|
|
fuse_type: str = "sum",
|
|
fpn_top_down_levels: Optional[List[int]] = None,
|
|
):
|
|
"""Initialize the neck
|
|
:param trunk: the backbone
|
|
:param position_encoding: the positional encoding to use
|
|
:param d_model: the dimension of the model
|
|
:param neck_norm: the normalization to use
|
|
"""
|
|
super().__init__()
|
|
self.position_encoding = position_encoding
|
|
self.convs = nn.ModuleList()
|
|
self.backbone_channel_list = backbone_channel_list
|
|
for dim in backbone_channel_list:
|
|
current = nn.Sequential()
|
|
current.add_module(
|
|
"conv",
|
|
nn.Conv2d(
|
|
in_channels=dim,
|
|
out_channels=d_model,
|
|
kernel_size=kernel_size,
|
|
stride=stride,
|
|
padding=padding,
|
|
),
|
|
)
|
|
|
|
self.convs.append(current)
|
|
self.fpn_interp_model = fpn_interp_model
|
|
assert fuse_type in ["sum", "avg"]
|
|
self.fuse_type = fuse_type
|
|
|
|
# levels to have top-down features in its outputs
|
|
# e.g. if fpn_top_down_levels is [2, 3], then only outputs of level 2 and 3
|
|
# have top-down propagation, while outputs of level 0 and level 1 have only
|
|
# lateral features from the same backbone level.
|
|
if fpn_top_down_levels is None:
|
|
# default is to have top-down features on all levels
|
|
fpn_top_down_levels = range(len(self.convs))
|
|
self.fpn_top_down_levels = list(fpn_top_down_levels)
|
|
|
|
def forward(self, xs: List[torch.Tensor]):
|
|
|
|
out = [None] * len(self.convs)
|
|
pos = [None] * len(self.convs)
|
|
assert len(xs) == len(self.convs)
|
|
# fpn forward pass
|
|
# see https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/fpn.py
|
|
prev_features = None
|
|
# forward in top-down order (from low to high resolution)
|
|
n = len(self.convs) - 1
|
|
for i in range(n, -1, -1):
|
|
x = xs[i]
|
|
lateral_features = self.convs[n - i](x)
|
|
if i in self.fpn_top_down_levels and prev_features is not None:
|
|
top_down_features = F.interpolate(
|
|
prev_features.to(dtype=torch.float32),
|
|
scale_factor=2.0,
|
|
mode=self.fpn_interp_model,
|
|
align_corners=(
|
|
None if self.fpn_interp_model == "nearest" else False
|
|
),
|
|
antialias=False,
|
|
)
|
|
prev_features = lateral_features + top_down_features
|
|
if self.fuse_type == "avg":
|
|
prev_features /= 2
|
|
else:
|
|
prev_features = lateral_features
|
|
x_out = prev_features
|
|
out[i] = x_out
|
|
pos[i] = self.position_encoding(x_out).to(x_out.dtype)
|
|
|
|
return out, pos
|