mirror of https://github.com/YifanXu74/MQ-Det.git
53 lines
1.9 KiB
Python
53 lines
1.9 KiB
Python
|
import torch
|
||
|
import torch.nn.functional as F
|
||
|
from torch import nn, Tensor
|
||
|
|
||
|
import copy
|
||
|
from typing import Optional, List
|
||
|
|
||
|
|
||
|
def _get_clones(module, N):
|
||
|
return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
|
||
|
|
||
|
|
||
|
def _get_activation_fn(activation):
|
||
|
"""Return an activation function given a string"""
|
||
|
if activation == "relu":
|
||
|
return F.relu
|
||
|
if activation == "gelu":
|
||
|
return F.gelu
|
||
|
if activation == "glu":
|
||
|
return F.glu
|
||
|
raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
|
||
|
|
||
|
|
||
|
class TransformerEncoderLayer(nn.Module):
|
||
|
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
|
||
|
activation="relu", normalize_before=False):
|
||
|
super(TransformerEncoderLayer, self).__init__()
|
||
|
self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
|
||
|
# Implementation of Feedforward model
|
||
|
self.linear1 = nn.Linear(d_model, dim_feedforward)
|
||
|
self.dropout = nn.Dropout(dropout)
|
||
|
self.linear2 = nn.Linear(dim_feedforward, d_model)
|
||
|
|
||
|
self.norm1 = nn.LayerNorm(d_model)
|
||
|
self.norm2 = nn.LayerNorm(d_model)
|
||
|
self.dropout1 = nn.Dropout(dropout)
|
||
|
self.dropout2 = nn.Dropout(dropout)
|
||
|
|
||
|
self.activation = _get_activation_fn(activation)
|
||
|
self.normalize_before = normalize_before
|
||
|
|
||
|
def forward(self, src,
|
||
|
src_mask: Optional[Tensor] = None,
|
||
|
src_key_padding_mask: Optional[Tensor] = None):
|
||
|
src2 = self.self_attn(src, src, src, attn_mask=src_mask,
|
||
|
key_padding_mask=src_key_padding_mask)[0]
|
||
|
src = src + self.dropout1(src2)
|
||
|
src = self.norm1(src)
|
||
|
src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
|
||
|
src = src + self.dropout2(src2)
|
||
|
src = self.norm2(src)
|
||
|
return src
|