mirror of https://github.com/alibaba/EasyCV.git
193 lines
5.9 KiB
Python
193 lines
5.9 KiB
Python
import copy
|
|
from typing import Optional
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from torch import Tensor
|
|
|
|
from easycv.framework.errors import RuntimeError
|
|
|
|
|
|
class MLP(nn.Module):
|
|
""" Very simple multi-layer perceptron (also called FFN)"""
|
|
|
|
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
|
|
super().__init__()
|
|
self.num_layers = num_layers
|
|
h = [hidden_dim] * (num_layers - 1)
|
|
self.layers = nn.ModuleList(
|
|
nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
|
|
|
|
def forward(self, x):
|
|
for i, layer in enumerate(self.layers):
|
|
x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
|
|
return x
|
|
|
|
|
|
class Mlp(nn.Module):
|
|
""" Multilayer perceptron.
|
|
Parameters:
|
|
act_layer: Specify the activate function, default use nn.GELU.
|
|
"""
|
|
|
|
def __init__(self,
|
|
in_features,
|
|
hidden_features=None,
|
|
out_features=None,
|
|
act_layer=nn.GELU,
|
|
drop=0.):
|
|
super().__init__()
|
|
out_features = out_features or in_features
|
|
hidden_features = hidden_features or in_features
|
|
self.fc1 = nn.Linear(in_features, hidden_features)
|
|
self.act = act_layer()
|
|
self.fc2 = nn.Linear(hidden_features, out_features)
|
|
self.drop = nn.Dropout(drop)
|
|
|
|
def forward(self, x):
|
|
x = self.fc1(x)
|
|
x = self.act(x)
|
|
x = self.drop(x)
|
|
x = self.fc2(x)
|
|
x = self.drop(x)
|
|
return x
|
|
|
|
|
|
def drop_path(x, drop_prob: float = 0., training: bool = False):
|
|
if drop_prob == 0. or not training:
|
|
return x
|
|
keep_prob = 1 - drop_prob
|
|
shape = (x.shape[0], ) + (1, ) * (
|
|
x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
|
|
random_tensor = keep_prob + torch.rand(
|
|
shape, dtype=x.dtype, device=x.device)
|
|
random_tensor.floor_() # binarize
|
|
output = x.div(keep_prob) * random_tensor
|
|
return output
|
|
|
|
|
|
class DropPath(nn.Module):
|
|
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
|
|
"""
|
|
|
|
def __init__(self, drop_prob=None):
|
|
super(DropPath, self).__init__()
|
|
self.drop_prob = drop_prob
|
|
|
|
def forward(self, x):
|
|
return drop_path(x, self.drop_prob, self.training)
|
|
|
|
def extra_repr(self):
|
|
return 'p={}'.format(self.drop_prob)
|
|
|
|
|
|
class TransformerEncoder(nn.Module):
|
|
|
|
def __init__(self,
|
|
encoder_layer,
|
|
num_layers,
|
|
norm=None,
|
|
d_model=256,
|
|
query_scale_type=None):
|
|
super().__init__()
|
|
self.layers = _get_clones(encoder_layer, num_layers)
|
|
self.num_layers = num_layers
|
|
self.query_scale_type = query_scale_type
|
|
if query_scale_type == 'cond_elewise':
|
|
self.query_scale = MLP(d_model, d_model, d_model, 2)
|
|
self.norm = norm
|
|
|
|
def forward(self,
|
|
src,
|
|
mask: Optional[Tensor] = None,
|
|
src_key_padding_mask: Optional[Tensor] = None,
|
|
pos: Optional[Tensor] = None):
|
|
output = src
|
|
|
|
for layer_id, layer in enumerate(self.layers):
|
|
# rescale the content and pos sim
|
|
if self.query_scale_type == 'cond_elewise':
|
|
pos_scales = self.query_scale(output)
|
|
else:
|
|
pos_scales = 1
|
|
output = layer(
|
|
output,
|
|
src_mask=mask,
|
|
src_key_padding_mask=src_key_padding_mask,
|
|
pos=pos * pos_scales)
|
|
|
|
if self.norm is not None:
|
|
output = self.norm(output)
|
|
|
|
return output
|
|
|
|
|
|
class TransformerEncoderLayer(nn.Module):
|
|
|
|
def __init__(self,
|
|
d_model,
|
|
nhead,
|
|
dim_feedforward=2048,
|
|
dropout=0.1,
|
|
activation='relu',
|
|
normalize_before=False):
|
|
super().__init__()
|
|
self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
|
|
# Implementation of Feedforward model
|
|
self.linear1 = nn.Linear(d_model, dim_feedforward)
|
|
self.dropout = nn.Dropout(dropout)
|
|
self.linear2 = nn.Linear(dim_feedforward, d_model)
|
|
|
|
self.norm1 = nn.LayerNorm(d_model)
|
|
self.norm2 = nn.LayerNorm(d_model)
|
|
self.dropout1 = nn.Dropout(dropout)
|
|
self.dropout2 = nn.Dropout(dropout)
|
|
|
|
self.activation = _get_activation_fn(activation)
|
|
self.normalize_before = normalize_before
|
|
|
|
def with_pos_embed(self, tensor, pos: Optional[Tensor]):
|
|
return tensor if pos is None else tensor + pos
|
|
|
|
def forward(self,
|
|
src,
|
|
src_mask: Optional[Tensor] = None,
|
|
src_key_padding_mask: Optional[Tensor] = None,
|
|
pos: Optional[Tensor] = None):
|
|
q = k = self.with_pos_embed(src, pos)
|
|
src2 = self.self_attn(
|
|
q,
|
|
k,
|
|
value=src,
|
|
attn_mask=src_mask,
|
|
key_padding_mask=src_key_padding_mask)[0]
|
|
src = src + self.dropout1(src2)
|
|
src = self.norm1(src)
|
|
src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
|
|
src = src + self.dropout2(src2)
|
|
src = self.norm2(src)
|
|
return src
|
|
|
|
|
|
def _get_clones(module, N, layer_share=False):
|
|
if layer_share:
|
|
return nn.ModuleList([module for i in range(N)])
|
|
else:
|
|
return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
|
|
|
|
|
|
def _get_activation_fn(activation):
|
|
"""Return an activation function given a string"""
|
|
if activation == 'relu':
|
|
return F.relu
|
|
if activation == 'gelu':
|
|
return F.gelu
|
|
if activation == 'glu':
|
|
return F.glu
|
|
if activation == 'prelu':
|
|
return nn.PReLU()
|
|
if activation == 'selu':
|
|
return F.selu
|
|
raise RuntimeError(F'activation should be relu/gelu, not {activation}.')
|