IOPaint/inpaint/plugins/segment_anything2/modeling/sam2_utils.py
root 70af4845af new file: inpaint/__init__.py
new file:   inpaint/__main__.py
	new file:   inpaint/api.py
	new file:   inpaint/batch_processing.py
	new file:   inpaint/benchmark.py
	new file:   inpaint/cli.py
	new file:   inpaint/const.py
	new file:   inpaint/download.py
	new file:   inpaint/file_manager/__init__.py
	new file:   inpaint/file_manager/file_manager.py
	new file:   inpaint/file_manager/storage_backends.py
	new file:   inpaint/file_manager/utils.py
	new file:   inpaint/helper.py
	new file:   inpaint/installer.py
	new file:   inpaint/model/__init__.py
	new file:   inpaint/model/anytext/__init__.py
	new file:   inpaint/model/anytext/anytext_model.py
	new file:   inpaint/model/anytext/anytext_pipeline.py
	new file:   inpaint/model/anytext/anytext_sd15.yaml
	new file:   inpaint/model/anytext/cldm/__init__.py
	new file:   inpaint/model/anytext/cldm/cldm.py
	new file:   inpaint/model/anytext/cldm/ddim_hacked.py
	new file:   inpaint/model/anytext/cldm/embedding_manager.py
	new file:   inpaint/model/anytext/cldm/hack.py
	new file:   inpaint/model/anytext/cldm/model.py
	new file:   inpaint/model/anytext/cldm/recognizer.py
	new file:   inpaint/model/anytext/ldm/__init__.py
	new file:   inpaint/model/anytext/ldm/models/__init__.py
	new file:   inpaint/model/anytext/ldm/models/autoencoder.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/__init__.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/ddim.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/ddpm.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/dpm_solver/__init__.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/dpm_solver/dpm_solver.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/dpm_solver/sampler.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/plms.py
	new file:   inpaint/model/anytext/ldm/models/diffusion/sampling_util.py
	new file:   inpaint/model/anytext/ldm/modules/__init__.py
	new file:   inpaint/model/anytext/ldm/modules/attention.py
	new file:   inpaint/model/anytext/ldm/modules/diffusionmodules/__init__.py
	new file:   inpaint/model/anytext/ldm/modules/diffusionmodules/model.py
	new file:   inpaint/model/anytext/ldm/modules/diffusionmodules/openaimodel.py
	new file:   inpaint/model/anytext/ldm/modules/diffusionmodules/upscaling.py
	new file:   inpaint/model/anytext/ldm/modules/diffusionmodules/util.py
	new file:   inpaint/model/anytext/ldm/modules/distributions/__init__.py
	new file:   inpaint/model/anytext/ldm/modules/distributions/distributions.py
	new file:   inpaint/model/anytext/ldm/modules/ema.py
	new file:   inpaint/model/anytext/ldm/modules/encoders/__init__.py
	new file:   inpaint/model/anytext/ldm/modules/encoders/modules.py
	new file:   inpaint/model/anytext/ldm/util.py
	new file:   inpaint/model/anytext/main.py
	new file:   inpaint/model/anytext/ocr_recog/RNN.py
	new file:   inpaint/model/anytext/ocr_recog/RecCTCHead.py
	new file:   inpaint/model/anytext/ocr_recog/RecModel.py
	new file:   inpaint/model/anytext/ocr_recog/RecMv1_enhance.py
	new file:   inpaint/model/anytext/ocr_recog/RecSVTR.py
	new file:   inpaint/model/anytext/ocr_recog/__init__.py
	new file:   inpaint/model/anytext/ocr_recog/common.py
	new file:   inpaint/model/anytext/ocr_recog/en_dict.txt
	new file:   inpaint/model/anytext/ocr_recog/ppocr_keys_v1.txt
	new file:   inpaint/model/anytext/utils.py
	new file:   inpaint/model/base.py
	new file:   inpaint/model/brushnet/__init__.py
	new file:   inpaint/model/brushnet/brushnet.py
	new file:   inpaint/model/brushnet/brushnet_unet_forward.py
	new file:   inpaint/model/brushnet/brushnet_wrapper.py
	new file:   inpaint/model/brushnet/pipeline_brushnet.py
	new file:   inpaint/model/brushnet/unet_2d_blocks.py
	new file:   inpaint/model/controlnet.py
	new file:   inpaint/model/ddim_sampler.py
	new file:   inpaint/model/fcf.py
	new file:   inpaint/model/helper/__init__.py
	new file:   inpaint/model/helper/controlnet_preprocess.py
	new file:   inpaint/model/helper/cpu_text_encoder.py
	new file:   inpaint/model/helper/g_diffuser_bot.py
	new file:   inpaint/model/instruct_pix2pix.py
	new file:   inpaint/model/kandinsky.py
	new file:   inpaint/model/lama.py
	new file:   inpaint/model/ldm.py
	new file:   inpaint/model/manga.py
	new file:   inpaint/model/mat.py
	new file:   inpaint/model/mi_gan.py
	new file:   inpaint/model/opencv2.py
	new file:   inpaint/model/original_sd_configs/__init__.py
	new file:   inpaint/model/original_sd_configs/sd_xl_base.yaml
	new file:   inpaint/model/original_sd_configs/sd_xl_refiner.yaml
	new file:   inpaint/model/original_sd_configs/v1-inference.yaml
	new file:   inpaint/model/original_sd_configs/v2-inference-v.yaml
	new file:   inpaint/model/paint_by_example.py
	new file:   inpaint/model/plms_sampler.py
	new file:   inpaint/model/power_paint/__init__.py
	new file:   inpaint/model/power_paint/pipeline_powerpaint.py
	new file:   inpaint/model/power_paint/power_paint.py
	new file:   inpaint/model/power_paint/power_paint_v2.py
	new file:   inpaint/model/power_paint/powerpaint_tokenizer.py
2024-08-20 21:17:33 +02:00

150 lines
5.6 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num):
"""
Select up to `max_cond_frame_num` conditioning frames from `cond_frame_outputs`
that are temporally closest to the current frame at `frame_idx`. Here, we take
- a) the closest conditioning frame before `frame_idx` (if any);
- b) the closest conditioning frame after `frame_idx` (if any);
- c) any other temporally closest conditioning frames until reaching a total
of `max_cond_frame_num` conditioning frames.
Outputs:
- selected_outputs: selected items (keys & values) from `cond_frame_outputs`.
- unselected_outputs: items (keys & values) not selected in `cond_frame_outputs`.
"""
if max_cond_frame_num == -1 or len(cond_frame_outputs) <= max_cond_frame_num:
selected_outputs = cond_frame_outputs
unselected_outputs = {}
else:
assert max_cond_frame_num >= 2, "we should allow using 2+ conditioning frames"
selected_outputs = {}
# the closest conditioning frame before `frame_idx` (if any)
idx_before = max((t for t in cond_frame_outputs if t < frame_idx), default=None)
if idx_before is not None:
selected_outputs[idx_before] = cond_frame_outputs[idx_before]
# the closest conditioning frame after `frame_idx` (if any)
idx_after = min((t for t in cond_frame_outputs if t >= frame_idx), default=None)
if idx_after is not None:
selected_outputs[idx_after] = cond_frame_outputs[idx_after]
# add other temporally closest conditioning frames until reaching a total
# of `max_cond_frame_num` conditioning frames.
num_remain = max_cond_frame_num - len(selected_outputs)
inds_remain = sorted(
(t for t in cond_frame_outputs if t not in selected_outputs),
key=lambda x: abs(x - frame_idx),
)[:num_remain]
selected_outputs.update((t, cond_frame_outputs[t]) for t in inds_remain)
unselected_outputs = {
t: v for t, v in cond_frame_outputs.items() if t not in selected_outputs
}
return selected_outputs, unselected_outputs
def get_1d_sine_pe(pos_inds, dim, temperature=10000):
"""
Get 1D sine positional embedding as in the original Transformer paper.
"""
pe_dim = dim // 2
dim_t = torch.arange(pe_dim, dtype=torch.float32, device=pos_inds.device)
dim_t = temperature ** (2 * (dim_t // 2) / pe_dim)
pos_embed = pos_inds.unsqueeze(-1) / dim_t
pos_embed = torch.cat([pos_embed.sin(), pos_embed.cos()], dim=-1)
return pos_embed
def get_activation_fn(activation):
"""Return an activation function given a string"""
if activation == "relu":
return F.relu
if activation == "gelu":
return F.gelu
if activation == "glu":
return F.glu
raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
def get_clones(module, N):
return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
class DropPath(nn.Module):
# adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py
def __init__(self, drop_prob=0.0, scale_by_keep=True):
super(DropPath, self).__init__()
self.drop_prob = drop_prob
self.scale_by_keep = scale_by_keep
def forward(self, x):
if self.drop_prob == 0.0 or not self.training:
return x
keep_prob = 1 - self.drop_prob
shape = (x.shape[0],) + (1,) * (x.ndim - 1)
random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
if keep_prob > 0.0 and self.scale_by_keep:
random_tensor.div_(keep_prob)
return x * random_tensor
# Lightly adapted from
# https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
class MLP(nn.Module):
def __init__(
self,
input_dim: int,
hidden_dim: int,
output_dim: int,
num_layers: int,
activation: nn.Module = nn.ReLU,
sigmoid_output: bool = False,
) -> None:
super().__init__()
self.num_layers = num_layers
h = [hidden_dim] * (num_layers - 1)
self.layers = nn.ModuleList(
nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
)
self.sigmoid_output = sigmoid_output
self.act = activation()
def forward(self, x):
for i, layer in enumerate(self.layers):
x = self.act(layer(x)) if i < self.num_layers - 1 else layer(x)
if self.sigmoid_output:
x = F.sigmoid(x)
return x
# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
# Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa
class LayerNorm2d(nn.Module):
def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
super().__init__()
self.weight = nn.Parameter(torch.ones(num_channels))
self.bias = nn.Parameter(torch.zeros(num_channels))
self.eps = eps
def forward(self, x: torch.Tensor) -> torch.Tensor:
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
x = self.weight[:, None, None] * x + self.bias[:, None, None]
return x