Yixiao Fang e4c4a81b56
[Feature] Support iTPN and HiViT (#1584)
* hivit added

* Update hivit.py

* Update hivit.py

* Add files via upload

* Update __init__.py

* Add files via upload

* Update __init__.py

* Add files via upload

* Update hivit.py

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Update itpn.py

* Add files via upload

* Update __init__.py

* Update mae_hivit-base-p16.py

* Delete mim_itpn-base-p16.py

* Add files via upload

* Update itpn_hivit-base-p16.py

* Update itpn.py

* Update hivit.py

* Update __init__.py

* Update mae.py

* Delete hivit.py

* Update __init__.py

* Delete configs/itpn directory

* Add files via upload

* Add files via upload

* Delete configs/hivit directory

* Add files via upload

* refactor and add metafile and readme

* update clip

* add ut

* update ut

* update

* update docstring

* update model.rst

---------

Co-authored-by: 田运杰 <48153283+sunsmarterjie@users.noreply.github.com>
2023-05-26 12:08:34 +08:00

57 lines
1.7 KiB
Python

# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Union
import torch
import torch.nn as nn
from mmengine.model import BaseModule
from mmpretrain.registry import MODELS
@MODELS.register_module()
class iTPNClipHead(BaseModule):
"""Head for iTPN Pre-training using Clip.
Compute the logits and the cross entropy loss.
Args:
embed_dims (int): The dimension of embedding.
num_embed (int): The number of classification types.
loss (dict): The config of loss.
init_cfg (dict or List[dict], optional): Initialization config dict.
Defaults to None.
"""
def __init__(
self,
embed_dims: int,
num_embed: int,
loss: dict,
init_cfg: Optional[Union[dict, List[dict]]] = dict(
type='TruncNormal', layer='Linear', std=0.02, bias=0)
) -> None:
super().__init__(init_cfg=init_cfg)
self.cls_head = nn.Linear(embed_dims, num_embed)
self.loss_module = MODELS.build(loss)
def loss(self, feats: torch.Tensor, target: torch.Tensor,
mask: torch.Tensor) -> torch.Tensor:
"""Generate loss.
Args:
feats (torch.Tensor): Features from backbone.
target (torch.Tensor): Target generated by target_generator.
mask (torch.Tensor): Generated mask for pretraing.
"""
mask = mask.to(torch.device('cuda'), non_blocking=True)
mask = mask.flatten(1).to(torch.bool)
target = target[mask]
# remove cls_token
# feats = feats[:, 1:]
logits = self.cls_head(feats[mask])
loss = self.loss_module(logits, target)
return loss