mirror of
https://github.com/alibaba/EasyCV.git
synced 2025-06-03 14:49:00 +08:00
Merge branch 'master_github'
This commit is contained in:
commit
608949b936
3
.gitignore
vendored
3
.gitignore
vendored
@ -137,6 +137,3 @@ pai_jobs/easycv/resources/
|
||||
*.tar.gz
|
||||
thirdparty/test
|
||||
scripts/test
|
||||
|
||||
# easycv default cache dir
|
||||
.easycv_cache
|
||||
|
@ -10,7 +10,7 @@ oss_io_config = dict(
|
||||
buckets=['your oss buckets'])
|
||||
|
||||
# model settings
|
||||
# 1920: merge 4 layers of features, open models/backbones/vit_transfomer_dynamic.py:311: self.forward_return_n_last_blocks
|
||||
# 1920: merge 4 layers of features, open models/backbones/vit_transformer_dynamic.py:311: self.forward_return_n_last_blocks
|
||||
# 384: default
|
||||
feature_num = 1920
|
||||
model = dict(
|
||||
|
@ -157,3 +157,6 @@ checkpoint_config = dict(interval=10)
|
||||
|
||||
# runtime settings
|
||||
total_epochs = 50
|
||||
|
||||
# export config
|
||||
export = dict(export_neck=True)
|
||||
|
@ -10,7 +10,7 @@ oss_io_config = dict(
|
||||
buckets=['your oss buckets'])
|
||||
|
||||
# model settings
|
||||
# 1920: merge 4 layers of features, open models/backbones/vit_transfomer_dynamic.py:311: self.forward_return_n_last_blocks
|
||||
# 1920: merge 4 layers of features, open models/backbones/vit_transformer_dynamic.py:311: self.forward_return_n_last_blocks
|
||||
# 384: default
|
||||
feature_num = 1920
|
||||
model = dict(
|
||||
|
@ -15,6 +15,7 @@ from mmcv.runner import get_dist_info, init_dist, load_checkpoint
|
||||
from easycv.apis import set_random_seed
|
||||
from easycv.datasets import build_dataloader, build_dataset
|
||||
from easycv.file import io
|
||||
from easycv.framework.errors import ValueError
|
||||
from easycv.models import build_model
|
||||
from easycv.utils.collect import dist_forward_collect, nondist_forward_collect
|
||||
from easycv.utils.config_tools import mmcv_config_fromfile
|
||||
|
@ -3,6 +3,8 @@ import argparse
|
||||
|
||||
import torch
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
@ -24,7 +26,7 @@ def main():
|
||||
output_dict['state_dict'][key[9:]] = value
|
||||
has_backbone = True
|
||||
if not has_backbone:
|
||||
raise Exception('Cannot find a backbone module in the checkpoint.')
|
||||
raise ValueError('Cannot find a backbone module in the checkpoint.')
|
||||
torch.save(output_dict, args.output)
|
||||
|
||||
|
||||
|
@ -2,11 +2,12 @@
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
|
||||
import torch
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
args = argparse.ArgumentParser(description='Process some integers.')
|
||||
args.add_argument(
|
||||
'model_path',
|
||||
@ -88,7 +89,7 @@ def extract_model(model_path):
|
||||
output_dict['state_dict'][key[9:]] = value
|
||||
has_backbone = True
|
||||
if not has_backbone:
|
||||
raise Exception('Cannot find a backbone module in the checkpoint.')
|
||||
raise ValueError('Cannot find a backbone module in the checkpoint.')
|
||||
torch.save(output_dict, backbone_file)
|
||||
|
||||
return backbone_file
|
||||
|
@ -86,3 +86,13 @@ checkpoint_config = dict(interval=10)
|
||||
|
||||
# runtime settings
|
||||
total_epochs = 100
|
||||
|
||||
predict = dict(
|
||||
type='ClassificationPredictor',
|
||||
pipelines=[
|
||||
dict(type='Resize', size=256),
|
||||
dict(type='CenterCrop', size=224),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Collect', keys=['img'])
|
||||
])
|
||||
|
143
configs/classification/imagenet/vit/deitiii_base_patch16_192.py
Normal file
143
configs/classification/imagenet/vit/deitiii_base_patch16_192.py
Normal file
@ -0,0 +1,143 @@
|
||||
# from PIL import Image
|
||||
|
||||
_base_ = 'configs/base.py'
|
||||
|
||||
log_config = dict(
|
||||
interval=10,
|
||||
hooks=[dict(type='TextLoggerHook'),
|
||||
dict(type='TensorboardLoggerHook')])
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type='Classification',
|
||||
train_preprocess=['mixUp'],
|
||||
pretrained=False,
|
||||
mixup_cfg=dict(
|
||||
mixup_alpha=0.8,
|
||||
cutmix_alpha=1.0,
|
||||
cutmix_minmax=None,
|
||||
prob=1.0,
|
||||
switch_prob=0.5,
|
||||
mode='batch',
|
||||
label_smoothing=0.0,
|
||||
num_classes=1000),
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=[192],
|
||||
num_classes=1000,
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
drop_rate=0.,
|
||||
drop_path_rate=0.2,
|
||||
use_layer_scale=True),
|
||||
head=dict(
|
||||
type='ClsHead',
|
||||
loss_config=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
loss_weight=1.0,
|
||||
label_ceil=True),
|
||||
with_fc=False,
|
||||
use_num_classes=False))
|
||||
|
||||
data_train_list = 'data/imagenet1k/train.txt'
|
||||
data_train_root = 'data/imagenet1k/train/'
|
||||
data_test_list = 'data/imagenet1k/val.txt'
|
||||
data_test_root = 'data/imagenet1k/val/'
|
||||
|
||||
dataset_type = 'ClsDataset'
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
three_augment_policies = [[
|
||||
dict(type='PILGaussianBlur', prob=1.0, radius_min=0.1, radius_max=2.0),
|
||||
], [
|
||||
dict(type='Solarization', threshold=128),
|
||||
], [
|
||||
dict(type='Grayscale', num_output_channels=3),
|
||||
]]
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='RandomResizedCrop', size=192, scale=(0.08, 1.0),
|
||||
interpolation=3), # interpolation='bicubic'
|
||||
dict(type='RandomHorizontalFlip'),
|
||||
dict(type='MMAutoAugment', policies=three_augment_policies),
|
||||
dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Collect', keys=['img', 'gt_labels'])
|
||||
]
|
||||
size = int((256 / 224) * 192)
|
||||
test_pipeline = [
|
||||
dict(type='Resize', size=size, interpolation=3),
|
||||
dict(type='CenterCrop', size=192),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Collect', keys=['img', 'gt_labels'])
|
||||
]
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=256,
|
||||
workers_per_gpu=8,
|
||||
use_repeated_augment_sampler=True,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
list_file=data_train_list,
|
||||
root=data_train_root,
|
||||
type='ClsSourceImageList'),
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
list_file=data_test_list,
|
||||
root=data_test_root,
|
||||
type='ClsSourceImageList'),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
eval_config = dict(initial=True, interval=1, gpu_collect=True)
|
||||
eval_pipelines = [
|
||||
dict(
|
||||
mode='test',
|
||||
data=data['val'],
|
||||
dist_eval=True,
|
||||
evaluators=[dict(type='ClsEvaluator', topk=(1, 5))],
|
||||
)
|
||||
]
|
||||
|
||||
# additional hooks
|
||||
custom_hooks = []
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(
|
||||
type='Lamb',
|
||||
lr=0.003,
|
||||
weight_decay=0.05,
|
||||
eps=1e-8,
|
||||
paramwise_options={
|
||||
'cls_token': dict(weight_decay=0.),
|
||||
'pos_embed': dict(weight_decay=0.),
|
||||
'bias': dict(weight_decay=0.),
|
||||
'norm': dict(weight_decay=0.),
|
||||
'gamma_1': dict(weight_decay=0.),
|
||||
'gamma_2': dict(weight_decay=0.),
|
||||
})
|
||||
optimizer_config = dict(grad_clip=None, update_interval=1)
|
||||
|
||||
lr_config = dict(
|
||||
policy='CosineAnnealingWarmupByEpoch',
|
||||
by_epoch=True,
|
||||
min_lr_ratio=0.00001 / 0.003,
|
||||
warmup='linear',
|
||||
warmup_by_epoch=True,
|
||||
warmup_iters=5,
|
||||
warmup_ratio=0.000001 / 0.003,
|
||||
)
|
||||
checkpoint_config = dict(interval=10)
|
||||
|
||||
# runtime settings
|
||||
total_epochs = 800
|
||||
|
||||
ema = dict(decay=0.99996)
|
@ -0,0 +1,17 @@
|
||||
_base_ = './deitiii_base_patch16_192.py'
|
||||
# model settings
|
||||
model = dict(
|
||||
type='Classification',
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=[192],
|
||||
num_classes=1000,
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
drop_rate=0.,
|
||||
drop_path_rate=0.2,
|
||||
use_layer_scale=True))
|
@ -0,0 +1,17 @@
|
||||
_base_ = './deitiii_base_patch16_192.py'
|
||||
# model settings
|
||||
model = dict(
|
||||
type='Classification',
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=[192],
|
||||
num_classes=1000,
|
||||
patch_size=16,
|
||||
embed_dim=1024,
|
||||
depth=24,
|
||||
num_heads=16,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
drop_rate=0.,
|
||||
drop_path_rate=0.45,
|
||||
use_layer_scale=True))
|
@ -0,0 +1,86 @@
|
||||
_base_ = './deitiii_base_patch16_192.py'
|
||||
# model settings
|
||||
model = dict(
|
||||
type='Classification',
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=[224],
|
||||
num_classes=1000,
|
||||
patch_size=16,
|
||||
embed_dim=384,
|
||||
depth=12,
|
||||
num_heads=6,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
drop_rate=0.,
|
||||
drop_path_rate=0.05,
|
||||
use_layer_scale=True))
|
||||
|
||||
data_train_list = 'data/imagenet1k/train.txt'
|
||||
data_train_root = 'data/imagenet1k/train/'
|
||||
data_test_list = 'data/imagenet1k/val.txt'
|
||||
data_test_root = 'data/imagenet1k/val/'
|
||||
|
||||
dataset_type = 'ClsDataset'
|
||||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
three_augment_policies = [[
|
||||
dict(type='PILGaussianBlur', prob=1.0, radius_min=0.1, radius_max=2.0),
|
||||
], [
|
||||
dict(type='Solarization', threshold=128),
|
||||
], [
|
||||
dict(type='Grayscale', num_output_channels=3),
|
||||
]]
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='RandomResizedCrop', size=224, scale=(0.08, 1.0),
|
||||
interpolation=3), # interpolation='bicubic'
|
||||
dict(type='RandomHorizontalFlip'),
|
||||
dict(type='MMAutoAugment', policies=three_augment_policies),
|
||||
dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Collect', keys=['img', 'gt_labels'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='Resize', size=256, interpolation=3),
|
||||
dict(type='CenterCrop', size=224),
|
||||
dict(type='ToTensor'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Collect', keys=['img', 'gt_labels'])
|
||||
]
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=256,
|
||||
workers_per_gpu=8,
|
||||
use_repeated_augment_sampler=True,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
list_file=data_train_list,
|
||||
root=data_train_root,
|
||||
type='ClsSourceImageList'),
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
list_file=data_test_list,
|
||||
root=data_test_root,
|
||||
type='ClsSourceImageList'),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
eval_pipelines = [
|
||||
dict(
|
||||
mode='test',
|
||||
data=data['val'],
|
||||
dist_eval=True,
|
||||
evaluators=[dict(type='ClsEvaluator', topk=(1, 5))],
|
||||
)
|
||||
]
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(lr=0.004)
|
||||
|
||||
lr_config = dict(
|
||||
min_lr_ratio=0.00001 / 0.004,
|
||||
warmup_ratio=0.000001 / 0.004,
|
||||
)
|
@ -101,13 +101,15 @@ val_dataset = dict(
|
||||
pipeline=test_pipeline)
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=1, workers_per_gpu=2, train=train_dataset, val=val_dataset)
|
||||
imgs_per_gpu=4, workers_per_gpu=2, train=train_dataset, val=val_dataset
|
||||
) # 64(total batch size) = 4 (batch size/per gpu) x 8 (gpu num) x 2(node)
|
||||
|
||||
# evaluation
|
||||
eval_config = dict(interval=1, gpu_collect=False)
|
||||
eval_config = dict(initial=False, interval=1, gpu_collect=False)
|
||||
eval_pipelines = [
|
||||
dict(
|
||||
mode='test',
|
||||
# dist_eval=True,
|
||||
evaluators=[
|
||||
dict(type='CocoDetectionEvaluator', classes=CLASSES),
|
||||
],
|
||||
|
@ -101,13 +101,15 @@ val_dataset = dict(
|
||||
pipeline=test_pipeline)
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=1, workers_per_gpu=2, train=train_dataset, val=val_dataset)
|
||||
imgs_per_gpu=4, workers_per_gpu=2, train=train_dataset, val=val_dataset
|
||||
) # 64(total batch size) = 4 (batch size/per gpu) x 8 (gpu num) x 2(node)
|
||||
|
||||
# evaluation
|
||||
eval_config = dict(interval=1, gpu_collect=False)
|
||||
eval_config = dict(initial=False, interval=1, gpu_collect=False)
|
||||
eval_pipelines = [
|
||||
dict(
|
||||
mode='test',
|
||||
# dist_eval=True,
|
||||
evaluators=[
|
||||
dict(type='CocoDetectionEvaluator', classes=CLASSES),
|
||||
dict(type='CocoMaskEvaluator', classes=CLASSES)
|
||||
|
@ -1,3 +0,0 @@
|
||||
_base_ = './vitdet_100e.py'
|
||||
|
||||
model = dict(backbone=dict(aggregation='basicblock'))
|
@ -1,3 +0,0 @@
|
||||
_base_ = './vitdet_100e.py'
|
||||
|
||||
model = dict(backbone=dict(aggregation='bottleneck'))
|
231
configs/detection/vitdet/vitdet_cascade_mask_rcnn.py
Normal file
231
configs/detection/vitdet/vitdet_cascade_mask_rcnn.py
Normal file
@ -0,0 +1,231 @@
|
||||
# model settings
|
||||
|
||||
norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
|
||||
|
||||
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
|
||||
model = dict(
|
||||
type='CascadeRCNN',
|
||||
pretrained=pretrained,
|
||||
backbone=dict(
|
||||
type='ViTDet',
|
||||
img_size=1024,
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
drop_path_rate=0.1,
|
||||
window_size=14,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
window_block_indexes=[
|
||||
# 2, 5, 8 11 for global attention
|
||||
0,
|
||||
1,
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
10,
|
||||
],
|
||||
residual_block_indexes=[],
|
||||
use_rel_pos=True),
|
||||
neck=dict(
|
||||
type='SFP',
|
||||
in_channels=768,
|
||||
out_channels=256,
|
||||
scale_factors=(4.0, 2.0, 1.0, 0.5),
|
||||
norm_cfg=norm_cfg,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
num_convs=2,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[8],
|
||||
ratios=[0.5, 1.0, 2.0],
|
||||
strides=[4, 8, 16, 32, 64]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='CascadeRoIHead',
|
||||
num_stages=3,
|
||||
stage_loss_weights=[1, 0.5, 0.25],
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
bbox_head=[
|
||||
dict(
|
||||
type='Shared4Conv1FCBBoxHead',
|
||||
conv_out_channels=256,
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=True,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
|
||||
loss_weight=1.0)),
|
||||
dict(
|
||||
type='Shared4Conv1FCBBoxHead',
|
||||
conv_out_channels=256,
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.05, 0.05, 0.1, 0.1]),
|
||||
reg_class_agnostic=True,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
|
||||
loss_weight=1.0)),
|
||||
dict(
|
||||
type='Shared4Conv1FCBBoxHead',
|
||||
conv_out_channels=256,
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.033, 0.033, 0.067, 0.067]),
|
||||
reg_class_agnostic=True,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
|
||||
],
|
||||
mask_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
mask_head=dict(
|
||||
type='FCNMaskHead',
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=4,
|
||||
in_channels=256,
|
||||
conv_out_channels=256,
|
||||
num_classes=80,
|
||||
loss_mask=dict(
|
||||
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=0,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_pre=2000,
|
||||
max_per_img=2000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=[
|
||||
dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=28,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.6,
|
||||
neg_iou_thr=0.6,
|
||||
min_pos_iou=0.6,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=28,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.7,
|
||||
min_pos_iou=0.7,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=28,
|
||||
pos_weight=-1,
|
||||
debug=False)
|
||||
]),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_pre=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5)))
|
||||
|
||||
mmlab_modules = [
|
||||
dict(type='mmdet', name='CascadeRCNN', module='model'),
|
||||
dict(type='mmdet', name='RPNHead', module='head'),
|
||||
dict(type='mmdet', name='CascadeRoIHead', module='head'),
|
||||
]
|
@ -0,0 +1,4 @@
|
||||
_base_ = [
|
||||
'./vitdet_cascade_mask_rcnn.py', './lsj_coco_instance.py',
|
||||
'./vitdet_schedule_100e.py'
|
||||
]
|
@ -1,6 +1,6 @@
|
||||
# model settings
|
||||
|
||||
norm_cfg = dict(type='GN', num_groups=1, requires_grad=True)
|
||||
norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
|
||||
|
||||
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
|
||||
model = dict(
|
||||
@ -9,22 +9,32 @@ model = dict(
|
||||
backbone=dict(
|
||||
type='ViTDet',
|
||||
img_size=1024,
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
drop_path_rate=0.1,
|
||||
window_size=14,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.1,
|
||||
use_abs_pos_emb=True,
|
||||
aggregation='attn',
|
||||
),
|
||||
window_block_indexes=[
|
||||
# 2, 5, 8 11 for global attention
|
||||
0,
|
||||
1,
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
10,
|
||||
],
|
||||
residual_block_indexes=[],
|
||||
use_rel_pos=True),
|
||||
neck=dict(
|
||||
type='SFP',
|
||||
in_channels=[768, 768, 768, 768],
|
||||
in_channels=768,
|
||||
out_channels=256,
|
||||
scale_factors=(4.0, 2.0, 1.0, 0.5),
|
||||
norm_cfg=norm_cfg,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
@ -32,7 +42,6 @@ model = dict(
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
num_convs=2,
|
||||
norm_cfg=norm_cfg,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[8],
|
||||
@ -98,7 +107,7 @@ model = dict(
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=True,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
|
@ -1,4 +1,4 @@
|
||||
_base_ = [
|
||||
'./vitdet_faster_rcnn.py', './lsj_coco_detection.py',
|
||||
'./vitdet_faster_rcnn.py', './lsj_coco_instance.py',
|
||||
'./vitdet_schedule_100e.py'
|
||||
]
|
||||
|
@ -1,6 +1,6 @@
|
||||
# model settings
|
||||
|
||||
norm_cfg = dict(type='GN', num_groups=1, requires_grad=True)
|
||||
norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
|
||||
|
||||
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
|
||||
model = dict(
|
||||
@ -9,22 +9,32 @@ model = dict(
|
||||
backbone=dict(
|
||||
type='ViTDet',
|
||||
img_size=1024,
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
drop_path_rate=0.1,
|
||||
window_size=14,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.1,
|
||||
use_abs_pos_emb=True,
|
||||
aggregation='attn',
|
||||
),
|
||||
window_block_indexes=[
|
||||
# 2, 5, 8 11 for global attention
|
||||
0,
|
||||
1,
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
9,
|
||||
10,
|
||||
],
|
||||
residual_block_indexes=[],
|
||||
use_rel_pos=True),
|
||||
neck=dict(
|
||||
type='SFP',
|
||||
in_channels=[768, 768, 768, 768],
|
||||
in_channels=768,
|
||||
out_channels=256,
|
||||
scale_factors=(4.0, 2.0, 1.0, 0.5),
|
||||
norm_cfg=norm_cfg,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
@ -32,7 +42,6 @@ model = dict(
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
num_convs=2,
|
||||
norm_cfg=norm_cfg,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[8],
|
||||
@ -112,7 +121,7 @@ model = dict(
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=True,
|
||||
match_low_quality=False,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
|
@ -1,26 +1,29 @@
|
||||
_base_ = 'configs/base.py'
|
||||
|
||||
log_config = dict(
|
||||
interval=200,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook'),
|
||||
# dict(type='TensorboardLoggerHook')
|
||||
])
|
||||
|
||||
checkpoint_config = dict(interval=10)
|
||||
|
||||
# optimizer
|
||||
paramwise_options = {
|
||||
'norm': dict(weight_decay=0.),
|
||||
'bias': dict(weight_decay=0.),
|
||||
'pos_embed': dict(weight_decay=0.),
|
||||
'cls_token': dict(weight_decay=0.)
|
||||
}
|
||||
optimizer = dict(
|
||||
type='AdamW',
|
||||
lr=1e-4,
|
||||
betas=(0.9, 0.999),
|
||||
weight_decay=0.1,
|
||||
paramwise_options=paramwise_options)
|
||||
optimizer_config = dict(grad_clip=None, loss_scale=512.)
|
||||
constructor='LayerDecayOptimizerConstructor',
|
||||
paramwise_options=dict(num_layers=12, layer_decay_rate=0.7))
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=250,
|
||||
warmup_ratio=0.067,
|
||||
warmup_ratio=0.001,
|
||||
step=[88, 96])
|
||||
total_epochs = 100
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ee64c0caef841c61c7e6344b7fe2c07a38fba07a8de81ff38c0686c641e0a283
|
||||
size 190356
|
||||
oid sha256:c696a58a2963b5ac47317751f04ff45bfed4723f2f70bacf91eac711f9710e54
|
||||
size 189432
|
||||
|
@ -156,7 +156,7 @@ easycv.models.backbones.swin\_transformer\_dynamic module
|
||||
easycv.models.backbones.vit\_transfomer\_dynamic module
|
||||
-------------------------------------------------------
|
||||
|
||||
.. automodule:: easycv.models.backbones.vit_transfomer_dynamic
|
||||
.. automodule:: easycv.models.backbones.vit_transformer_dynamic
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
@ -21,6 +21,9 @@
|
||||
| hrnetw64 | [hrnetw64](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/hrnet/imagenet_hrnetw64_jpg.py) | 79.884 | 95.04 | 5120 | 54.74 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/resnet/hrnetw64/epoch_100.pth) |
|
||||
| vit-base-patch16 | [vit-base-patch16](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_vit_base_patch16_224_jpg.py) | 76.082 | 92.026 | 346 | 8.03 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/vit/vit-base-patch16/epoch_300.pth) |
|
||||
| swin-tiny-patch4-window7 | [swin-tiny-patch4-window7](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/swint/imagenet_swin_tiny_patch4_window7_224_jpg.py) | 80.528 | 94.822 | 132 | 12.94 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/swint/swin-tiny-patch4-window7/epoch_300.pth) |
|
||||
| deitiii-small-patch16-224 | [deitiii-small-patch16-224](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_small_patch16_224_jpg.py) | 81.408 | 95.388 | 89 | 4.53 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_small_patch16_224/deitiii_small.pth) |
|
||||
| deitiii-base-patch16-192 | [deitiii-base-patch16-192](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_base_patch16_192_jpg.py) | 82.982 | 95.95 | 337 | 4.63 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_base_patch16_192/deitiii_base.pth) |
|
||||
| deitiii-large-patch16-192 | [deitiii-large-patch16-192](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py) | 83.902 | 96.296 | 1170 | 10.17 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_large_patch16_192/deitiii_large.pth) |
|
||||
|
||||
(ps: 通过EasyCV训练得到模型结果,推理的输入尺寸默认为224,机器默认为V100 16G,其中gpu memory记录的是gpu peak memory)
|
||||
|
||||
|
@ -6,38 +6,37 @@ Inference default use V100 16G.
|
||||
|
||||
Pretrained on COCO2017 dataset. (The result has been optimized with PAI-Blade, and only computes the model inference time. To learn about end2end inference time, you can refer to [export.md](./tutorials/export.md).)
|
||||
|
||||
| Algorithm | Config | Params | Speed<sup>V100<br/><sub>fp16 b32 </sub> | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
|
||||
|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|--------|-----------------------------------------|-------------------------------------|------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| YOLOX-s | [yolox_s_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_s_8xb16_300e_coco.py) | 9M | 0.68ms | 40.0 | 58.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/log.txt) |
|
||||
| PAI-YOLOXs | [yoloxs_pai_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_8xb16_300e_coco.py) | 16M | 0.71ms | 41.4 | 60.0 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs.json) |
|
||||
| PAI-YOLOXs-ASFF | [yoloxs_pai_asff_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_8xb16_300e_coco.py) | 21M | 0.87ms | 42.8 | 61.8 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff.json) |
|
||||
| Algorithm | Config | Params | Speed<sup>V100<br/><sub>fp16 b32 </sub> | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
|
||||
| --------------------- | ------------------------------------------------------------ | ------ | --------------------------------------- | ----------------------------------- | ---------------------------- | ------------------------------------------------------------ |
|
||||
| YOLOX-s | [yolox_s_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_s_8xb16_300e_coco.py) | 9M | 0.68ms | 40.0 | 58.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/log.txt) |
|
||||
| PAI-YOLOXs | [yoloxs_pai_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_8xb16_300e_coco.py) | 16M | 0.71ms | 41.4 | 60.0 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs.json) |
|
||||
| PAI-YOLOXs-ASFF | [yoloxs_pai_asff_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_8xb16_300e_coco.py) | 21M | 0.87ms | 42.8 | 61.8 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff.json) |
|
||||
| PAI-YOLOXs-ASFF-TOOD3 | [yoloxs_pai_asff_tood3_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_tood3_8xb16_300e_coco.py) | 24M | 1.15ms | 43.9 | 62.1 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff_tood3.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff_tood3.json) |
|
||||
| YOLOX-m | [yolox_m_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb16_300e_coco.py) | 25M | 1.52ms | 46.3 | 64.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/log.txt) |
|
||||
| YOLOX-l | [yolox_l_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb8_300e_coco.py) | 54M | 2.47ms | 48.9 | 67.5 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/log.txt) |
|
||||
| YOLOX-x | [yolox_x_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_x_8xb8_300e_coco.py) | 99M | 4.74ms | 50.9 | 69.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/log.txt) |
|
||||
| YOLOX-tiny | [yolox_tiny_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 5M | 0.28ms | 31.5 | 49.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/log.txt) |
|
||||
| YOLOX-nano | [yolox_nano_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 2.2M | 0.19ms | 26.5 | 42.6 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/log.txt) |
|
||||
| YOLOX-m | [yolox_m_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb16_300e_coco.py) | 25M | 1.52ms | 46.3 | 64.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/log.txt) |
|
||||
| YOLOX-l | [yolox_l_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb8_300e_coco.py) | 54M | 2.47ms | 48.9 | 67.5 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/log.txt) |
|
||||
| YOLOX-x | [yolox_x_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_x_8xb8_300e_coco.py) | 99M | 4.74ms | 50.9 | 69.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/log.txt) |
|
||||
| YOLOX-tiny | [yolox_tiny_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 5M | 0.28ms | 31.5 | 49.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/log.txt) |
|
||||
| YOLOX-nano | [yolox_nano_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 2.2M | 0.19ms | 26.5 | 42.6 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/log.txt) |
|
||||
|
||||
## ViTDet
|
||||
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | mask_mAP<sup>val<br/><sub>0.5:0.95</sub> | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_100e.py) | 88M/118M | 163ms | 50.57 | 44.96 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/vitdet_maskrcnn.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/vitdet_maskrcnn.log.json) |
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | mask_mAP<sup>val<br/><sub>0.5:0.95</sub> | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_mask_rcnn_100e.py) | 86M/111M | 13.3 (fp16) | 138ms | 50.65 | 45.41 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/20220901_135827.log.json) |
|
||||
|
||||
## FCOS
|
||||
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| FCOS-r50(caffe) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_caffe_1x_coco.py) | 23M/32M | 85.8ms | 38.58 | 57.18 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220621_121315.log.json) |
|
||||
| FCOS-r50(torch) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_torch_1x_coco.py) | 23M/32M | 105.3ms | 38.88 | 58.01 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/fcos_epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220826_182628.log.json) |
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| FCOS-r50(caffe) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_caffe_1x_coco.py) | 23M/32M | 5.0 | 85.8ms | 38.58 | 57.18 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220621_121315.log.json) |
|
||||
| FCOS-r50(torch) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_torch_1x_coco.py) | 23M/32M | 4.0 (fp16) | 105.3ms | 38.88 | 58.01 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/fcos_epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220826_182628.log.json) |
|
||||
|
||||
## DETR
|
||||
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| DETR-r50 | [detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/detr/detr_r50_8x2_150e_coco.py) | 23M/41M | 48.5ms | 39.92 | 60.52 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/epoch_150.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/20220609_101243.log.json) |
|
||||
| DAB-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dab_detr_r50_8x2_50e_coco.py) | 23M/43M | 58.5ms | 42.52 | 63.03 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/dab_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/20220610_122811.log.json) |
|
||||
| DN-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dn_detr_r50_8x2_50e_coco.py) | 23M/43M | 58.5ms | 44.39 | 64.66 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/dn_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/20220713_105127.log.json) |
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| DETR-r50 | [detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/detr/detr_r50_8x2_150e_coco.py) | 23M/41M | 8.5 | 48.5ms | 39.92 | 60.52 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/epoch_150.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/20220609_101243.log.json) |
|
||||
| DAB-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dab_detr_r50_8x2_50e_coco.py) | 23M/43M | 2.6 | 58.5ms | 42.52 | 63.03 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/dab_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/20220610_122811.log.json) |
|
||||
| DN-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dn_detr_r50_8x2_50e_coco.py) | 23M/43M | 7.8 | 58.5ms | 44.39 | 64.66 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/dn_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/20220713_105127.log.json) |
|
||||
|
||||
## DINO
|
||||
|
||||
|
@ -4,29 +4,29 @@
|
||||
|
||||
Pretrained on **Pascal VOC 2012 + Aug**.
|
||||
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | mIoU | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| fcn_r50_d8 | [fcn_r50-d8_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py) | 23M/49M | 166ms | 69.01 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/20220525_203606.log.json) |
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | mIoU | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| fcn_r50_d8 | [fcn_r50-d8_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py) | 23M/49M | 19.8 | 166ms | 69.01 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/20220525_203606.log.json) |
|
||||
|
||||
## UperNet
|
||||
|
||||
Pretrained on **Pascal VOC 2012 + Aug**.
|
||||
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | mIoU | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| upernet_r50 | [upernet_r50_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12aug.py) | 23M/66M | 282.9ms | 76.59 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/20220706_114712.log.json) |
|
||||
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | mIoU | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| upernet_r50 | [upernet_r50_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12aug.py) | 23M/66M | 5.5 | 282.9ms | 76.59 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/20220706_114712.log.json) |
|
||||
|
||||
## Mask2former
|
||||
|
||||
### Instance Segmentation on COCO
|
||||
| Algorithm | Config | box MAP | Mask mAP | Download |
|
||||
| ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |
|
||||
| mask2former_r50 | [mask2former_r50_8xb2_e50_instance](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_instance.py) | 46.09 | 43.26 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/20220620_113639.log.json) |
|
||||
| Algorithm | Config | Train memory<br/>(GB) | box MAP | Mask mAP | Download |
|
||||
| ---------- | ------------------------------------------------------------ |----------|----------|----------|----------|
|
||||
| mask2former_r50 | [mask2former_r50_8xb2_e50_instance](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_instance.py) | 18.8 | 46.09 | 43.26 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/20220620_113639.log.json) |
|
||||
|
||||
### Panoptic Segmentation on COCO
|
||||
| Algorithm | Config | PQ | box MAP | Mask mAP | Download |
|
||||
| ---------- | ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |
|
||||
| mask2former_r50 | [mask2former_r50_8xb2_e50_panopatic](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_panopatic.py) | 51.64 | 44.81 | 41.88 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/20220629_170721.log.json) |
|
||||
|
||||
| Algorithm | Config | Train memory<br/>(GB) | PQ | box MAP | Mask mAP | Download |
|
||||
| ---------- | ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |---------------------------------------------------------------------------- |
|
||||
| mask2former_r50 | [mask2former_r50_8xb2_e50_panopatic](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_panopatic.py) | 18.8 | 51.64 | 44.81 | 41.88 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/20220629_170721.log.json) |
|
||||
|
||||
|
||||
## SegFormer
|
||||
|
@ -5,19 +5,19 @@
|
||||
|
||||
Pretrained on **ImageNet** dataset.
|
||||
|
||||
| Config | Backbone | Params<br>(backbone/total) | Flops | inference time(V100)<br>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | -------- | -------------------------- | ----- | -------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [mae_vit_base_patch16_8xb64_400e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_400e.py) | ViT-B/16 | 85M/111M | 9.8G | 8.03 | 400 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-400/pretrain_400.pth) |
|
||||
| [mae_vit_base_patch16_8xb64_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_1600e.py) | ViT-B/16 | 85M/111M | 9.8G | 8.03 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/pretrain_1600.pth) |
|
||||
| [mae_vit_large_patch16_8xb32_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_large_patch16_8xb32_1600e.py) | ViT-L/16 | 303M/329M | 20.8G | 16.30 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-l-1600/pretrain_1600.pth) |
|
||||
| Config | Backbone | Params<br>(backbone/total) | Train memory<br/>(GB) | Flops | inference time(V100)<br>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | -------- | -------------------------- | ------------------ | ----- | -------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [mae_vit_base_patch16_8xb64_400e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_400e.py) | ViT-B/16 | 85M/111M | 9.5 | 9.8G | 8.03 | 400 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-400/pretrain_400.pth) |
|
||||
| [mae_vit_base_patch16_8xb64_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_1600e.py) | ViT-B/16 | 85M/111M | 9.5 | 9.8G | 8.03 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/pretrain_1600.pth) |
|
||||
| [mae_vit_large_patch16_8xb32_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_large_patch16_8xb32_1600e.py) | ViT-L/16 | 303M/329M | 11.3 | 20.8G | 16.30 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-l-1600/pretrain_1600.pth) |
|
||||
|
||||
### Fast ConvMAE
|
||||
|
||||
Pretrained on **ImageNet** dataset.
|
||||
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | --------------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [fast_convmae_vit_base_patch16_8xb64_50e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/fast_convmae/fast_convmae_vit_base_patch16_8xb64_50e.py) | ConvViT-B/16 | 88M/115M | 45.1G | 6.88 | 50 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/20220617_110501.log.json) |
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Train memory<br/>(GB) | Flops | inference time(V100)<br/>(ms/img) | Total train time | Epochs | Download |
|
||||
| ------------------------------------------------------------ | --------------- | --------------------------- | ----- | --------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
|
||||
| [fast_convmae_vit_base_patch16_8xb64_50e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/fast_convmae/fast_convmae_vit_base_patch16_8xb64_50e.py) | ConvViT-B/16 | 88M/115M | 30.3 | 45.1G | 6.88 | 20h<br/>(8*A100) | 50 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/20220617_110501.log.json) |
|
||||
|
||||
> The flops of Fast ConvMAE is about four times of MAE, because the mask of MAE only retains 25% of the tokens each forward, but the mask of Fast ConvMAE adopts a complementary strategy, dividing the mask into four complementary parts with 25% token each part. This is equivalent to learning four samples at each forward, achieving 4 times the learning effect.
|
||||
|
||||
@ -25,34 +25,34 @@ Pretrained on **ImageNet** dataset.
|
||||
|
||||
Pretrained on **ImageNet** dataset.
|
||||
|
||||
| Config | Backbone | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | --------- | --------------------------- | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [dino_deit_small_p16_8xb32_100e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/dino/dino_deit_small_p16_8xb32_100e_tfrecord.py) | DeiT-S/16 | 21M/88M | 6.17 | 100 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/dino_deit_small/epoch_100.pth) |
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | --------- | --------------------------- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [dino_deit_small_p16_8xb32_100e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/dino/dino_deit_small_p16_8xb32_100e_tfrecord.py) | DeiT-S/16 | 21M/88M | 10.5 | 6.17 | 100 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/dino_deit_small/epoch_100.pth) |
|
||||
|
||||
### MoBY
|
||||
|
||||
Pretrained on **ImageNet** dataset.
|
||||
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | --------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [moby_deit_small_p16_4xb128_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_deit_small_p16_4xb128_300e_tfrecord.py) | DeiT-S/16 | 21M/26M | 18.6G | 6.17 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/log.txt) |
|
||||
| [moby_swin_tiny_8xb64_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_dynamic_swin_tiny_8xb64_300e_tfrecord.py) | Swin-T | 27M/33M | 18.1G | 9.74 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/log.txt) |
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Flops | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | --------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [moby_deit_small_p16_4xb128_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_deit_small_p16_4xb128_300e_tfrecord.py) | DeiT-S/16 | 21M/26M | 18.6G | 21.4 | 6.17 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/log.txt) |
|
||||
| [moby_swin_tiny_8xb64_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_dynamic_swin_tiny_8xb64_300e_tfrecord.py) | Swin-T | 27M/33M | 18.1G | 16.1 | 9.74 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/log.txt) |
|
||||
|
||||
### MoCo V2
|
||||
|
||||
Pretrained on **ImageNet** dataset.
|
||||
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [mocov2_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mocov2/mocov2_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 8.2G | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mocov2_r50/epoch_200.pth) |
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Flops | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [mocov2_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mocov2/mocov2_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 8.2G | 5.4 | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mocov2_r50/epoch_200.pth) |
|
||||
|
||||
### SwAV
|
||||
|
||||
Pretrained on **ImageNet** dataset.
|
||||
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [swav_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/swav/swav_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 12.9G | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/epoch_200.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/log.txt) |
|
||||
| Config | Backbone | Params<br/>(backbone/total) | Flops | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
|
||||
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
|
||||
| [swav_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/swav/swav_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 12.9G | 11.3 | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/epoch_200.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/log.txt) |
|
||||
|
||||
## Benchmarks
|
||||
|
||||
|
@ -262,7 +262,7 @@
|
||||
"state_dict = torch.load(weight_path)['state_dict']\n",
|
||||
"state_dict_out = {}\n",
|
||||
"for key in state_dict:\n",
|
||||
" state_dict_out[key.replace('encoder.','')] = state_dict[key]\n",
|
||||
" state_dict_out['model.' + key.replace('encoder.','')] = state_dict[key]\n",
|
||||
"torch.save(state_dict_out,weight_path)"
|
||||
]
|
||||
},
|
||||
@ -324,7 +324,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!python -m torch.distributed.launch --nproc_per_node=1 --master_port=29930 \\\n",
|
||||
"/home/pai/lib/python3.6/site-packages/easycv/tools/train.py mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py --work_dir work_dir/selfsup/jpg/mae --launcher pytorch"
|
||||
"/home/pai/lib/python3.6/site-packages/easycv/tools/train.py mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py --work_dir work_dir/selfsup/jpg/mae_fintune --launcher pytorch"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -333,7 +333,56 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 预测\n",
|
||||
"参考EasyCV图像分类的demo,对训练好的模型导出并预测"
|
||||
"对训练好的模型导出并预测"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4271c852",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! python -m easycv.tools.export mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py work_dir/selfsup/jpg/mae_fintune/ClsEvaluator_neck_top1_best.pth work_dir/selfsup/jpg/mae_fintune/best_export.pth"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cc9e6fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"下载测试图片和标签文件"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "973d5bd4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/cifar10/qince_data/predict/aeroplane_s_000004.png\n",
|
||||
"! wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/doc/easycv/configs/selfsup/mae/label_map.txt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a5a3632",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"from easycv.predictors.classifier import TorchClassifier\n",
|
||||
"\n",
|
||||
"output_ckpt = 'work_dir/selfsup/jpg/mae_fintune/best_export.pth'\n",
|
||||
"tcls = TorchClassifier(output_ckpt, topk=1, label_map_path='label_map.txt')\n",
|
||||
"\n",
|
||||
"img = cv2.imread('aeroplane_s_000004.png')\n",
|
||||
"# input image should be RGB order\n",
|
||||
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
|
||||
"output = tcls.predict([img])\n",
|
||||
"print(output)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -13,6 +13,7 @@ import torchvision.transforms.functional as t_f
|
||||
from mmcv.utils import Config
|
||||
|
||||
from easycv.file import io
|
||||
from easycv.framework.errors import ValueError
|
||||
from easycv.models import (DINO, MOCO, SWAV, YOLOX, Classification, MoBY,
|
||||
build_model)
|
||||
from easycv.utils.checkpoint import load_checkpoint
|
||||
@ -106,6 +107,9 @@ def _export_cls(model, cfg, filename):
|
||||
backbone=replace_syncbn(cfg.model.backbone),
|
||||
)
|
||||
|
||||
# avoid load pretrained model
|
||||
model_config['pretrained'] = False
|
||||
|
||||
if export_neck:
|
||||
if hasattr(cfg.model, 'neck'):
|
||||
model_config['neck'] = cfg.model.neck
|
||||
|
@ -15,6 +15,7 @@ from mmcv.parallel import (MMDataParallel, MMDistributedDataParallel,
|
||||
from mmcv.runner import get_dist_info
|
||||
|
||||
from easycv.file import io
|
||||
from easycv.framework.errors import ValueError
|
||||
from easycv.utils.torchacc_util import is_torchacc_enabled
|
||||
|
||||
|
||||
|
@ -31,6 +31,7 @@ from easycv.core import standard_fields
|
||||
from easycv.core.evaluation import coco_tools
|
||||
from easycv.core.post_processing.nms import oks_nms, soft_oks_nms
|
||||
from easycv.core.standard_fields import DetectionResultFields, InputDataFields
|
||||
from easycv.framework.errors import KeyError, TypeError, ValueError
|
||||
from easycv.utils.json_utils import MyEncoder
|
||||
from .base_evaluator import Evaluator
|
||||
from .builder import EVALUATORS
|
||||
@ -365,7 +366,7 @@ class CocoDetectionEvaluator(Evaluator):
|
||||
def _check_mask_type_and_value(array_name, masks):
|
||||
"""Checks whether mask dtype is uint8 and the values are either 0 or 1."""
|
||||
if masks.dtype != np.uint8:
|
||||
raise ValueError('{} must be of type np.uint8. Found {}.'.format(
|
||||
raise TypeError('{} must be of type np.uint8. Found {}.'.format(
|
||||
array_name, masks.dtype))
|
||||
if np.any(np.logical_and(masks != 0, masks != 1)):
|
||||
raise ValueError(
|
||||
|
@ -3,6 +3,7 @@
|
||||
# https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
|
||||
import numpy as np
|
||||
|
||||
from easycv.framework.errors import KeyError
|
||||
from .base_evaluator import Evaluator
|
||||
from .builder import EVALUATORS
|
||||
from .metric_registry import METRICS
|
||||
|
@ -1,6 +1,8 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import inspect
|
||||
|
||||
from easycv.framework.errors import KeyError, TypeError
|
||||
|
||||
|
||||
class MetricRegistry(object):
|
||||
|
||||
|
@ -5,6 +5,7 @@ import numpy as np
|
||||
import torch
|
||||
from prettytable import PrettyTable
|
||||
|
||||
from easycv.framework.errors import KeyError
|
||||
from easycv.utils.logger import print_log
|
||||
from .base_evaluator import Evaluator
|
||||
from .builder import EVALUATORS
|
||||
|
@ -6,6 +6,7 @@ import cv2
|
||||
import numpy as np
|
||||
|
||||
from easycv.core.post_processing import transform_preds
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
def _calc_distances(preds, targets, mask, normalize):
|
||||
|
@ -4,6 +4,7 @@ import torch
|
||||
from torch.optim import *
|
||||
|
||||
from .builder import build_optimizer_constructor
|
||||
from .lamb import Lamb
|
||||
from .lars import LARS
|
||||
from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
|
||||
from .ranger import Ranger
|
||||
|
@ -8,6 +8,8 @@ from mmcv.runner.optimizer.builder import OPTIMIZERS
|
||||
from torch import Tensor
|
||||
from torch.optim import AdamW as _AdamW
|
||||
|
||||
from easycv.framework.errors import RuntimeError
|
||||
|
||||
|
||||
def adamw(params: List[Tensor], grads: List[Tensor], exp_avgs: List[Tensor],
|
||||
exp_avg_sqs: List[Tensor], max_exp_avg_sqs: List[Tensor],
|
||||
|
168
easycv/core/optimizer/lamb.py
Normal file
168
easycv/core/optimizer/lamb.py
Normal file
@ -0,0 +1,168 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import math
|
||||
|
||||
import torch
|
||||
from mmcv.runner import OPTIMIZERS
|
||||
from torch.optim import Optimizer
|
||||
|
||||
from easycv.framework.errors import RuntimeError
|
||||
|
||||
|
||||
@OPTIMIZERS.register_module()
|
||||
class Lamb(Optimizer):
|
||||
"""A pure pytorch variant of FuseLAMB (NvLamb variant) optimizer.
|
||||
This class is copied from `timm`_. The LAMB was proposed in `Large Batch
|
||||
Optimization for Deep Learning - Training BERT in 76 minutes`_.
|
||||
.. _timm:
|
||||
https://github.com/rwightman/pytorch-image-models/blob/master/timm/optim/lamb.py
|
||||
.. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
|
||||
https://arxiv.org/abs/1904.00962
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups.
|
||||
lr (float, optional): learning rate. (default: 1e-3)
|
||||
betas (Tuple[float, float], optional): coefficients used for computing
|
||||
running averages of gradient and its norm. (default: (0.9, 0.999))
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability. (default: 1e-8)
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
grad_averaging (bool, optional): whether apply (1-beta2) to grad when
|
||||
calculating running averages of gradient. (default: True)
|
||||
max_grad_norm (float, optional): value used to clip global grad norm
|
||||
(default: 1.0)
|
||||
trust_clip (bool): enable LAMBC trust ratio clipping (default: False)
|
||||
always_adapt (boolean, optional): Apply adaptive learning rate to 0.0
|
||||
weight decay parameter (default: False)
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(self,
|
||||
params,
|
||||
lr=1e-3,
|
||||
bias_correction=True,
|
||||
betas=(0.9, 0.999),
|
||||
eps=1e-6,
|
||||
weight_decay=0.01,
|
||||
grad_averaging=True,
|
||||
max_grad_norm=1.0,
|
||||
trust_clip=False,
|
||||
always_adapt=False):
|
||||
defaults = dict(
|
||||
lr=lr,
|
||||
bias_correction=bias_correction,
|
||||
betas=betas,
|
||||
eps=eps,
|
||||
weight_decay=weight_decay,
|
||||
grad_averaging=grad_averaging,
|
||||
max_grad_norm=max_grad_norm,
|
||||
trust_clip=trust_clip,
|
||||
always_adapt=always_adapt)
|
||||
super().__init__(params, defaults)
|
||||
|
||||
@torch.no_grad()
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
with torch.enable_grad():
|
||||
loss = closure()
|
||||
|
||||
device = self.param_groups[0]['params'][0].device
|
||||
one_tensor = torch.tensor(
|
||||
1.0, device=device
|
||||
) # because torch.where doesn't handle scalars correctly
|
||||
global_grad_norm = torch.zeros(1, device=device)
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError(
|
||||
'Lamb does not support sparse gradients, consider '
|
||||
'SparseAdam instead.')
|
||||
global_grad_norm.add_(grad.pow(2).sum())
|
||||
|
||||
global_grad_norm = torch.sqrt(global_grad_norm)
|
||||
# FIXME it'd be nice to remove explicit tensor conversion of scalars
|
||||
# when torch.where promotes
|
||||
# scalar types properly https://github.com/pytorch/pytorch/issues/9190
|
||||
max_grad_norm = torch.tensor(
|
||||
self.defaults['max_grad_norm'], device=device)
|
||||
clip_global_grad_norm = torch.where(global_grad_norm > max_grad_norm,
|
||||
global_grad_norm / max_grad_norm,
|
||||
one_tensor)
|
||||
|
||||
for group in self.param_groups:
|
||||
bias_correction = 1 if group['bias_correction'] else 0
|
||||
beta1, beta2 = group['betas']
|
||||
grad_averaging = 1 if group['grad_averaging'] else 0
|
||||
beta3 = 1 - beta1 if grad_averaging else 1.0
|
||||
|
||||
# assume same step across group now to simplify things
|
||||
# per parameter step can be easily support by making it tensor, or
|
||||
# pass list into kernel
|
||||
if 'step' in group:
|
||||
group['step'] += 1
|
||||
else:
|
||||
group['step'] = 1
|
||||
|
||||
if bias_correction:
|
||||
bias_correction1 = 1 - beta1**group['step']
|
||||
bias_correction2 = 1 - beta2**group['step']
|
||||
else:
|
||||
bias_correction1, bias_correction2 = 1.0, 1.0
|
||||
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.div_(clip_global_grad_norm)
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
# Exponential moving average of gradient valuesa
|
||||
state['exp_avg'] = torch.zeros_like(p)
|
||||
# Exponential moving average of squared gradient values
|
||||
state['exp_avg_sq'] = torch.zeros_like(p)
|
||||
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg.mul_(beta1).add_(grad, alpha=beta3) # m_t
|
||||
exp_avg_sq.mul_(beta2).addcmul_(
|
||||
grad, grad, value=1 - beta2) # v_t
|
||||
|
||||
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(
|
||||
group['eps'])
|
||||
update = (exp_avg / bias_correction1).div_(denom)
|
||||
|
||||
weight_decay = group['weight_decay']
|
||||
if weight_decay != 0:
|
||||
update.add_(p, alpha=weight_decay)
|
||||
|
||||
if weight_decay != 0 or group['always_adapt']:
|
||||
# Layer-wise LR adaptation. By default, skip adaptation on
|
||||
# parameters that are
|
||||
# excluded from weight decay, unless always_adapt == True,
|
||||
# then always enabled.
|
||||
w_norm = p.norm(2.0)
|
||||
g_norm = update.norm(2.0)
|
||||
# FIXME nested where required since logical and/or not
|
||||
# working in PT XLA
|
||||
trust_ratio = torch.where(
|
||||
w_norm > 0,
|
||||
torch.where(g_norm > 0, w_norm / g_norm, one_tensor),
|
||||
one_tensor,
|
||||
)
|
||||
if group['trust_clip']:
|
||||
# LAMBC trust clipping, upper bound fixed at one
|
||||
trust_ratio = torch.minimum(trust_ratio, one_tensor)
|
||||
update.mul_(trust_ratio)
|
||||
|
||||
p.add_(update, alpha=-group['lr'])
|
||||
|
||||
return loss
|
@ -3,6 +3,8 @@ import torch
|
||||
from torch.optim import * # noqa: F401,F403
|
||||
from torch.optim.optimizer import Optimizer, required
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
class LARS(Optimizer):
|
||||
r"""Implements layer-wise adaptive rate scaling for SGD.
|
||||
|
@ -1,5 +1,3 @@
|
||||
# Reference from https://github.com/ViTAE-Transformer/ViTDet/blob/main/mmcv_custom/layer_decay_optimizer_constructor.py
|
||||
|
||||
import json
|
||||
|
||||
from mmcv.runner import DefaultOptimizerConstructor, get_dist_info
|
||||
@ -7,23 +5,32 @@ from mmcv.runner import DefaultOptimizerConstructor, get_dist_info
|
||||
from .builder import OPTIMIZER_BUILDERS
|
||||
|
||||
|
||||
def get_num_layer_for_vit(var_name, num_max_layer, layer_sep=None):
|
||||
if var_name in ('backbone.cls_token', 'backbone.mask_token',
|
||||
'backbone.pos_embed'):
|
||||
return 0
|
||||
elif var_name.startswith('backbone.patch_embed'):
|
||||
return 0
|
||||
elif var_name.startswith('backbone.blocks'):
|
||||
layer_id = int(var_name.split('.')[2])
|
||||
return layer_id + 1
|
||||
else:
|
||||
return num_max_layer - 1
|
||||
def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12):
|
||||
"""
|
||||
Calculate lr decay rate for different ViT blocks.
|
||||
Reference from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py
|
||||
Args:
|
||||
name (string): parameter name.
|
||||
lr_decay_rate (float): base lr decay rate.
|
||||
num_layers (int): number of ViT blocks.
|
||||
Returns:
|
||||
lr decay rate for the given parameter.
|
||||
"""
|
||||
layer_id = num_layers + 1
|
||||
if '.pos_embed' in name or '.patch_embed' in name:
|
||||
layer_id = 0
|
||||
elif '.blocks.' in name and '.residual.' not in name:
|
||||
layer_id = int(name[name.find('.blocks.'):].split('.')[2]) + 1
|
||||
|
||||
scale = lr_decay_rate**(num_layers + 1 - layer_id)
|
||||
|
||||
return layer_id, scale
|
||||
|
||||
|
||||
@OPTIMIZER_BUILDERS.register_module()
|
||||
class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
|
||||
|
||||
def add_params(self, params, module, prefix='', is_dcn_module=None):
|
||||
def add_params(self, params, module):
|
||||
"""Add all parameters of module to the params list.
|
||||
The parameters of the given module will be added to the list of param
|
||||
groups, with specific rules defined by paramwise_cfg.
|
||||
@ -31,54 +38,41 @@ class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
|
||||
params (list[dict]): A list of param groups, it will be modified
|
||||
in place.
|
||||
module (nn.Module): The module to be added.
|
||||
prefix (str): The prefix of the module
|
||||
is_dcn_module (int|float|None): If the current module is a
|
||||
submodule of DCN, `is_dcn_module` will be passed to
|
||||
control conv_offset layer's learning rate. Defaults to None.
|
||||
|
||||
Reference from https://github.com/ViTAE-Transformer/ViTDet/blob/main/mmcv_custom/layer_decay_optimizer_constructor.py
|
||||
Note: Currently, this optimizer constructor is built for ViTDet.
|
||||
"""
|
||||
# get param-wise options
|
||||
|
||||
parameter_groups = {}
|
||||
print(self.paramwise_cfg)
|
||||
num_layers = self.paramwise_cfg.get('num_layers') + 2
|
||||
layer_sep = self.paramwise_cfg.get('layer_sep', None)
|
||||
layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
|
||||
lr_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
|
||||
num_layers = self.paramwise_cfg.get('num_layers')
|
||||
print('Build LayerDecayOptimizerConstructor %f - %d' %
|
||||
(layer_decay_rate, num_layers))
|
||||
(lr_decay_rate, num_layers))
|
||||
lr = self.base_lr
|
||||
weight_decay = self.base_wd
|
||||
|
||||
custom_keys = self.paramwise_cfg.get('custom_keys', {})
|
||||
# first sort with alphabet order and then sort with reversed len of str
|
||||
sorted_keys = sorted(custom_keys.keys())
|
||||
|
||||
for name, param in module.named_parameters():
|
||||
|
||||
if not param.requires_grad:
|
||||
continue # frozen weights
|
||||
|
||||
if len(param.shape) == 1 or name.endswith('.bias') or (
|
||||
'pos_embed' in name) or ('cls_token'
|
||||
in name) or ('rel_pos_' in name):
|
||||
if 'backbone' in name and ('.norm' in name or '.pos_embed' in name
|
||||
or '.gn.' in name or '.ln.' in name):
|
||||
group_name = 'no_decay'
|
||||
this_weight_decay = 0.
|
||||
else:
|
||||
group_name = 'decay'
|
||||
this_weight_decay = weight_decay
|
||||
|
||||
layer_id = get_num_layer_for_vit(name, num_layers, layer_sep)
|
||||
if name.startswith('backbone'):
|
||||
layer_id, scale = get_vit_lr_decay_rate(
|
||||
name, lr_decay_rate=lr_decay_rate, num_layers=num_layers)
|
||||
else:
|
||||
layer_id, scale = -1, 1
|
||||
group_name = 'layer_%d_%s' % (layer_id, group_name)
|
||||
|
||||
# if the parameter match one of the custom keys, ignore other rules
|
||||
this_lr_multi = 1.
|
||||
for key in sorted_keys:
|
||||
if key in f'{name}':
|
||||
lr_mult = custom_keys[key].get('lr_mult', 1.)
|
||||
this_lr_multi = lr_mult
|
||||
group_name = '%s_%s' % (group_name, key)
|
||||
break
|
||||
|
||||
if group_name not in parameter_groups:
|
||||
scale = layer_decay_rate**(num_layers - layer_id - 1)
|
||||
|
||||
parameter_groups[group_name] = {
|
||||
'weight_decay': this_weight_decay,
|
||||
@ -86,7 +80,7 @@ class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
|
||||
'param_names': [],
|
||||
'lr_scale': scale,
|
||||
'group_name': group_name,
|
||||
'lr': scale * self.base_lr * this_lr_multi,
|
||||
'lr': scale * lr,
|
||||
}
|
||||
|
||||
parameter_groups[group_name]['params'].append(param)
|
||||
|
@ -4,6 +4,8 @@ import math
|
||||
import torch
|
||||
from torch.optim.optimizer import Optimizer
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
def centralized_gradient(x, use_gc=True, gc_conv_only=False):
|
||||
'''credit - https://github.com/Yonghongwei/Gradient-Centralization '''
|
||||
|
@ -22,6 +22,7 @@ import torch
|
||||
from easycv.core.sailfish.util import (BiasUniformInitializer,
|
||||
KaimingUniformInitializer,
|
||||
ModelParallel, RenormUniformInitializer)
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
class Linear(torch.nn.Module):
|
||||
|
@ -25,6 +25,7 @@ from easycv.core.sailfish.function import (all_cat, all_log_softmax,
|
||||
shard_correct_predictions,
|
||||
shard_target_and_mask,
|
||||
shard_topk_correct_predictions)
|
||||
from easycv.framework.errors import NotImplementedError, ValueError
|
||||
|
||||
|
||||
class DistributedParallel:
|
||||
|
@ -10,6 +10,8 @@ import numpy as np
|
||||
from mmcv.utils.misc import deprecated_api_warning
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
from easycv.framework.errors import FileNotFoundError
|
||||
|
||||
|
||||
def get_font_path():
|
||||
root_path = opd(opd(opd(os.path.realpath(__file__))))
|
||||
@ -22,8 +24,8 @@ def get_font_path():
|
||||
elif os.path.exists(find_path_source):
|
||||
return find_path_source
|
||||
else:
|
||||
raise ValueError('Not find font file both in %s and %s' %
|
||||
(find_path_whl, find_path_source))
|
||||
raise FileNotFoundError('Not find font file both in %s and %s' %
|
||||
(find_path_whl, find_path_source))
|
||||
|
||||
|
||||
_FONT_PATH = get_font_path()
|
||||
|
@ -7,6 +7,7 @@ from PIL import Image, ImageFile
|
||||
|
||||
from easycv.datasets.registry import DATASOURCES
|
||||
from easycv.file import io
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.utils.dist_utils import dist_zero_exec
|
||||
from .utils import split_listfile_byrank
|
||||
|
||||
@ -54,8 +55,8 @@ class ClsSourceImageList(object):
|
||||
'list_file should be str or list(str)'
|
||||
root = [root] if isinstance(root, str) else root
|
||||
if not isinstance(root, list):
|
||||
raise ValueError('root must be str or list(str), but get %s' %
|
||||
type(root))
|
||||
raise TypeError('root must be str or list(str), but get %s' %
|
||||
type(root))
|
||||
|
||||
if len(root) < len(list_file):
|
||||
logging.warning(
|
||||
|
@ -3,6 +3,7 @@ from PIL import Image
|
||||
|
||||
from easycv.datasets.registry import DATASETS
|
||||
from easycv.datasets.shared.base import BaseDataset
|
||||
from easycv.framework.errors import NotImplementedError
|
||||
|
||||
|
||||
@DATASETS.register_module
|
||||
|
@ -8,10 +8,11 @@ from typing import Sequence
|
||||
|
||||
import mmcv
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from PIL import Image, ImageFilter
|
||||
|
||||
from easycv.datasets.registry import PIPELINES
|
||||
from easycv.datasets.shared.pipelines import Compose
|
||||
from easycv.framework.errors import TypeError
|
||||
|
||||
# Default hyperparameters for all Ops
|
||||
_HPARAMS_DEFAULT = dict(pad_val=128)
|
||||
@ -1043,3 +1044,37 @@ class Cutout(object):
|
||||
repr_str += f'pad_val={self.pad_val}, '
|
||||
repr_str += f'prob={self.prob})'
|
||||
return repr_str
|
||||
|
||||
|
||||
@PIPELINES.register_module()
|
||||
class PILGaussianBlur(object):
|
||||
|
||||
def __init__(self, prob=0.1, radius_min=0.1, radius_max=2.):
|
||||
assert 0 <= prob <= 1.0, 'The prob should be in range [0,1], ' \
|
||||
f'got {prob} instead.'
|
||||
assert isinstance(radius_min, (int, float)), 'The radius_min type must '\
|
||||
f'be int or float, but got {type(radius_min)} instead.'
|
||||
assert isinstance(radius_max, (int, float)), 'The radius_max type must '\
|
||||
f'be int or float, but got {type(radius_max)} instead.'
|
||||
|
||||
self.prob = prob
|
||||
self.radius_min = radius_min
|
||||
self.radius_max = radius_max
|
||||
|
||||
def __call__(self, results):
|
||||
if np.random.rand() > self.prob:
|
||||
return results
|
||||
|
||||
for key in results.get('img_fields', ['img']):
|
||||
img = results[key].filter(
|
||||
ImageFilter.GaussianBlur(
|
||||
radius=random.uniform(self.radius_min, self.radius_max)))
|
||||
results[key] = img
|
||||
return results
|
||||
|
||||
def __repr__(self):
|
||||
repr_str = self.__class__.__name__
|
||||
repr_str += f'(prob={self.prob}, '
|
||||
repr_str += f'radius_min={self.radius_min}, '
|
||||
repr_str += f'radius_max={self.radius_max})'
|
||||
return repr_str
|
||||
|
@ -10,6 +10,7 @@ from mmcv.runner.dist_utils import get_dist_info
|
||||
from tqdm import tqdm
|
||||
|
||||
from easycv.file.image import load_image
|
||||
from easycv.framework.errors import NotImplementedError, ValueError
|
||||
|
||||
|
||||
def _load_image(img_path):
|
||||
|
@ -4,6 +4,7 @@ from xtcocotools.coco import COCO
|
||||
|
||||
from easycv.datasets.registry import DATASOURCES, PIPELINES
|
||||
from easycv.datasets.shared.pipelines import Compose
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.utils.registry import build_from_cfg
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ from xtcocotools.coco import COCO
|
||||
from easycv.datasets.detection.data_sources import DetSourceCoco
|
||||
from easycv.datasets.registry import DATASOURCES, PIPELINES
|
||||
from easycv.datasets.shared.pipelines import Compose
|
||||
from easycv.framework.errors import RuntimeError, TypeError
|
||||
from easycv.utils.registry import build_from_cfg
|
||||
|
||||
try:
|
||||
|
@ -8,6 +8,7 @@ import numpy as np
|
||||
from easycv.datasets.detection.data_sources.base import DetSourceBase
|
||||
from easycv.datasets.registry import DATASOURCES
|
||||
from easycv.file import io
|
||||
from easycv.framework.errors import NotImplementedError, ValueError
|
||||
|
||||
|
||||
def get_prior_task_id(keys):
|
||||
|
@ -9,6 +9,7 @@ import numpy as np
|
||||
import torch
|
||||
|
||||
from easycv.datasets.registry import DATASETS, PIPELINES
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.utils.bbox_util import xyxy2xywh as xyxy2cxcywh
|
||||
from easycv.utils.registry import build_from_cfg
|
||||
from .raw import DetDataset
|
||||
|
@ -13,6 +13,7 @@ from torchvision.transforms import functional as F
|
||||
|
||||
from easycv.datasets.registry import PIPELINES
|
||||
from easycv.datasets.shared.pipelines.transforms import Compose
|
||||
from easycv.framework.errors import KeyError, NotImplementedError, TypeError
|
||||
|
||||
try:
|
||||
from panopticapi.utils import rgb2id
|
||||
@ -1122,8 +1123,8 @@ class MMRandomFlip:
|
||||
elif flip_ratio is None:
|
||||
pass
|
||||
else:
|
||||
raise ValueError('flip_ratios must be None, float, '
|
||||
'or list of float')
|
||||
raise TypeError('flip_ratios must be None, float, '
|
||||
'or list of float')
|
||||
self.flip_ratio = flip_ratio
|
||||
|
||||
valid_directions = ['horizontal', 'vertical', 'diagonal']
|
||||
@ -1133,7 +1134,7 @@ class MMRandomFlip:
|
||||
assert mmcv.is_list_of(direction, str)
|
||||
assert set(direction).issubset(set(valid_directions))
|
||||
else:
|
||||
raise ValueError('direction must be either str or list of str')
|
||||
raise TypeError('direction must be either str or list of str')
|
||||
self.direction = direction
|
||||
|
||||
if isinstance(flip_ratio, list):
|
||||
@ -1168,7 +1169,7 @@ class MMRandomFlip:
|
||||
flipped[..., 2::4] = w - bboxes[..., 0::4]
|
||||
flipped[..., 3::4] = h - bboxes[..., 1::4]
|
||||
else:
|
||||
raise ValueError(f"Invalid flipping direction '{direction}'")
|
||||
raise KeyError(f"Invalid flipping direction '{direction}'")
|
||||
return flipped
|
||||
|
||||
def __call__(self, results):
|
||||
@ -1274,7 +1275,7 @@ class MMRandomCrop:
|
||||
if crop_type not in [
|
||||
'relative_range', 'relative', 'absolute', 'absolute_range'
|
||||
]:
|
||||
raise ValueError(f'Invalid crop_type {crop_type}.')
|
||||
raise KeyError(f'Invalid crop_type {crop_type}.')
|
||||
if crop_type in ['absolute', 'absolute_range']:
|
||||
assert crop_size[0] > 0 and crop_size[1] > 0
|
||||
assert isinstance(crop_size[0], int) and isinstance(
|
||||
|
@ -9,6 +9,7 @@ from easycv.datasets.detection.data_sources import DetSourceCoco
|
||||
from easycv.datasets.registry import DATASETS
|
||||
from easycv.datasets.shared.base import BaseDataset
|
||||
from easycv.file.image import load_image
|
||||
from easycv.framework.errors import TimeoutError
|
||||
|
||||
|
||||
@DATASETS.register_module
|
||||
@ -38,7 +39,7 @@ class DetDataset(BaseDataset):
|
||||
count = 0
|
||||
while True:
|
||||
if count > 10:
|
||||
raise RuntimeError('Loops timeout')
|
||||
raise TimeoutError('Loops timeout')
|
||||
data_dict = self.data_source[idx]
|
||||
data_dict = self.pipeline(data_dict)
|
||||
if data_dict is None:
|
||||
|
@ -11,10 +11,11 @@ from mmcv.runner import get_dist_info
|
||||
from torch.utils.data import DataLoader, RandomSampler
|
||||
|
||||
from easycv.datasets.shared.odps_reader import set_dataloader_workid
|
||||
from easycv.framework.errors import NotImplementedError
|
||||
from easycv.utils.dist_utils import sync_random_seed
|
||||
from easycv.utils.torchacc_util import is_torchacc_enabled
|
||||
from .collate import CollateWrapper
|
||||
from .sampler import DistributedMPSampler, DistributedSampler
|
||||
from .sampler import DistributedMPSampler, DistributedSampler, RASampler
|
||||
|
||||
if platform.system() != 'Windows':
|
||||
# https://github.com/pytorch/pytorch/issues/973
|
||||
@ -35,6 +36,7 @@ def build_dataloader(dataset,
|
||||
odps_config=None,
|
||||
persistent_workers=False,
|
||||
collate_hooks=None,
|
||||
use_repeated_augment_sampler=False,
|
||||
**kwargs):
|
||||
"""Build PyTorch DataLoader.
|
||||
In distributed training, each GPU/process has a dataloader.
|
||||
@ -56,6 +58,8 @@ def build_dataloader(dataset,
|
||||
data in worker process can be reused.
|
||||
persistent_workers (bool) : After pytorch1.7, could use persistent_workers=True to
|
||||
avoid reconstruct dataworker before each epoch, speed up before epoch
|
||||
use_repeated_augment_sampler (bool) : If set true, it will use RASampler.
|
||||
Default: False.
|
||||
kwargs: any keyword argument to be used to initialize DataLoader
|
||||
Returns:
|
||||
DataLoader: A PyTorch dataloader.
|
||||
@ -68,7 +72,9 @@ def build_dataloader(dataset,
|
||||
'split_huge_listfile_byrank',
|
||||
False)
|
||||
|
||||
if hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
|
||||
if use_repeated_augment_sampler:
|
||||
sampler = RASampler(dataset, world_size, rank, shuffle=shuffle)
|
||||
elif hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
|
||||
sampler = DistributedMPSampler(
|
||||
dataset,
|
||||
world_size,
|
||||
@ -88,7 +94,10 @@ def build_dataloader(dataset,
|
||||
else:
|
||||
if replace:
|
||||
raise NotImplementedError
|
||||
if hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
|
||||
|
||||
if use_repeated_augment_sampler:
|
||||
sampler = RASampler(dataset, 1, 0, shuffle=shuffle)
|
||||
elif hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
|
||||
sampler = DistributedMPSampler(
|
||||
dataset, 1, 0, shuffle=shuffle, replace=replace)
|
||||
else:
|
||||
|
@ -6,10 +6,13 @@ import random
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from mmcv.runner import get_dist_info
|
||||
from torch.utils.data import DistributedSampler as _DistributedSampler
|
||||
from torch.utils.data import Sampler
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
class DistributedMPSampler(_DistributedSampler):
|
||||
|
||||
@ -83,7 +86,9 @@ class DistributedMPSampler(_DistributedSampler):
|
||||
self.label_list = []
|
||||
|
||||
if not self.dataset.data_source.has_labels:
|
||||
raise 'MPSampler need initial with classification datasets which has label!'
|
||||
raise ValueError(
|
||||
'MPSampler need initial with classification datasets which has label!'
|
||||
)
|
||||
|
||||
for idx, label in enumerate(self.dataset.data_source.labels):
|
||||
if label in self.label_dict.keys():
|
||||
@ -469,3 +474,73 @@ class DistributedGivenIterationSampler(Sampler):
|
||||
|
||||
def set_epoch(self, epoch):
|
||||
pass
|
||||
|
||||
|
||||
class RASampler(torch.utils.data.Sampler):
|
||||
"""Sampler that restricts data loading to a subset of the dataset for distributed,
|
||||
with repeated augmentation.
|
||||
It ensures that different each augmented version of a sample will be visible to a
|
||||
different process (GPU)
|
||||
Heavily based on torch.utils.data.DistributedSampler
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset,
|
||||
num_replicas=None,
|
||||
rank=None,
|
||||
shuffle=True,
|
||||
num_repeats: int = 3):
|
||||
if num_replicas is None:
|
||||
if not dist.is_available():
|
||||
raise RuntimeError(
|
||||
'Requires distributed package to be available')
|
||||
num_replicas = dist.get_world_size()
|
||||
if rank is None:
|
||||
if not dist.is_available():
|
||||
raise RuntimeError(
|
||||
'Requires distributed package to be available')
|
||||
rank = dist.get_rank()
|
||||
if num_repeats < 1:
|
||||
raise ValueError('num_repeats should be greater than 0')
|
||||
self.dataset = dataset
|
||||
self.num_replicas = num_replicas
|
||||
self.rank = rank
|
||||
self.num_repeats = num_repeats
|
||||
self.epoch = 0
|
||||
self.num_samples = int(
|
||||
math.ceil(
|
||||
len(self.dataset) * self.num_repeats / self.num_replicas))
|
||||
self.total_size = self.num_samples * self.num_replicas
|
||||
# self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas))
|
||||
self.num_selected_samples = int(
|
||||
math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
|
||||
self.shuffle = shuffle
|
||||
|
||||
def __iter__(self):
|
||||
if self.shuffle:
|
||||
# deterministically shuffle based on epoch
|
||||
g = torch.Generator()
|
||||
g.manual_seed(self.epoch)
|
||||
indices = torch.randperm(len(self.dataset), generator=g)
|
||||
else:
|
||||
indices = torch.arange(start=0, end=len(self.dataset))
|
||||
|
||||
# add extra samples to make it evenly divisible
|
||||
indices = torch.repeat_interleave(
|
||||
indices, repeats=self.num_repeats, dim=0).tolist()
|
||||
padding_size: int = self.total_size - len(indices)
|
||||
if padding_size > 0:
|
||||
indices += indices[:padding_size]
|
||||
assert len(indices) == self.total_size
|
||||
|
||||
# subsample
|
||||
indices = indices[self.rank:self.total_size:self.num_replicas]
|
||||
assert len(indices) == self.num_samples
|
||||
|
||||
return iter(indices[:self.num_selected_samples])
|
||||
|
||||
def __len__(self):
|
||||
return self.num_selected_samples
|
||||
|
||||
def set_epoch(self, epoch):
|
||||
self.epoch = epoch
|
||||
|
@ -7,6 +7,7 @@ import json_tricks as json
|
||||
import numpy as np
|
||||
|
||||
from easycv.datasets.registry import DATASOURCES
|
||||
from easycv.framework.errors import ValueError
|
||||
from .top_down import PoseTopDownSource
|
||||
|
||||
COCO_DATASET_INFO = dict(
|
||||
|
@ -12,6 +12,7 @@ from mmcv.utils.path import is_filepath
|
||||
from xtcocotools.coco import COCO
|
||||
|
||||
from easycv.datasets.registry import DATASOURCES
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
class DatasetInfo:
|
||||
|
@ -6,6 +6,7 @@ from easycv.core.evaluation.keypoint_eval import KeyPointEvaluator
|
||||
from easycv.datasets.pose.data_sources.coco import PoseTopDownSource
|
||||
from easycv.datasets.registry import DATASETS
|
||||
from easycv.datasets.shared.base import BaseDataset
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
@DATASETS.register_module()
|
||||
|
@ -9,6 +9,7 @@ from easycv.core.post_processing import (affine_transform, fliplr_joints,
|
||||
get_affine_transform, get_warp_matrix,
|
||||
warp_affine_joints)
|
||||
from easycv.datasets.registry import PIPELINES
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
@PIPELINES.register_module()
|
||||
|
@ -3,6 +3,7 @@ from easycv.core.evaluation.coco_evaluation import CoCoPoseTopDownEvaluator
|
||||
from easycv.datasets.pose.data_sources.coco import PoseTopDownSource
|
||||
from easycv.datasets.registry import DATASETS
|
||||
from easycv.datasets.shared.base import BaseDataset
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
@DATASETS.register_module()
|
||||
|
@ -12,6 +12,7 @@ from tqdm import tqdm
|
||||
|
||||
from easycv.datasets.registry import DATASOURCES
|
||||
from easycv.file.image import load_image as _load_img
|
||||
from easycv.framework.errors import NotImplementedError, ValueError
|
||||
|
||||
|
||||
def load_image(img_path):
|
||||
@ -26,7 +27,7 @@ def load_image(img_path):
|
||||
|
||||
|
||||
def load_seg_map(seg_path, reduce_zero_label):
|
||||
gt_semantic_seg = _load_img(seg_path, mode='RGB')
|
||||
gt_semantic_seg = _load_img(seg_path, mode='P')
|
||||
# reduce zero_label
|
||||
if reduce_zero_label:
|
||||
# avoid using underflow conversion
|
||||
|
@ -7,6 +7,7 @@ from PIL import Image, ImageFile
|
||||
|
||||
from easycv.datasets.registry import DATASOURCES
|
||||
from easycv.file import io
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
|
||||
@DATASOURCES.register_module
|
||||
|
@ -7,6 +7,7 @@ from easycv.datasets.builder import build_datasource
|
||||
from easycv.datasets.registry import DATASETS, PIPELINES
|
||||
from easycv.datasets.shared.base import BaseDataset
|
||||
from easycv.datasets.shared.pipelines.transforms import Compose
|
||||
from easycv.framework.errors import NotImplementedError
|
||||
from easycv.utils.registry import build_from_cfg
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ import torch
|
||||
from mmcv.parallel import DataContainer as DC
|
||||
|
||||
from easycv.datasets.registry import PIPELINES
|
||||
from easycv.framework.errors import TypeError
|
||||
|
||||
|
||||
def to_tensor(data):
|
||||
|
@ -6,6 +6,7 @@ import numpy as np
|
||||
|
||||
from easycv.datasets.registry import PIPELINES
|
||||
from easycv.file.image import load_image
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.utils.registry import build_from_cfg
|
||||
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from easycv.datasets.registry import DATASETS
|
||||
from easycv.framework.errors import NotImplementedError
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
|
@ -9,6 +9,8 @@ from datetime import datetime
|
||||
from functools import lru_cache
|
||||
from typing import List, Union
|
||||
|
||||
from easycv.framework.errors import NotImplementedError
|
||||
|
||||
|
||||
class IOBase:
|
||||
|
||||
|
@ -11,6 +11,8 @@ from typing import List, Union
|
||||
from tqdm import tqdm
|
||||
from tqdm.utils import CallbackIOWrapper
|
||||
|
||||
from easycv.framework.errors import (FileNotFoundError, IOError, RuntimeError,
|
||||
ValueError)
|
||||
from .base import IOLocal
|
||||
from .utils import (OSS_PREFIX, create_namedtuple, get_oss_config, is_oss_path,
|
||||
mute_stderr, oss_progress)
|
||||
@ -198,7 +200,7 @@ class IO(IOLocal):
|
||||
time.sleep(3)
|
||||
|
||||
if data is None:
|
||||
raise ValueError('Read file error: %s!' % full_path)
|
||||
raise IOError('Read file error: %s!' % full_path)
|
||||
|
||||
if mode == 'rb':
|
||||
return NullContextWrapper(BytesIO(data))
|
||||
@ -519,6 +521,11 @@ class IO(IOLocal):
|
||||
]
|
||||
if path in files:
|
||||
files.remove(path)
|
||||
if recursive:
|
||||
files = [
|
||||
i for i in files
|
||||
if not self.isdir(f'{OSS_PREFIX}{bucket.bucket_name}/{i}')
|
||||
]
|
||||
|
||||
if not files and not self._obj_exists(bucket, path):
|
||||
raise FileNotFoundError(
|
||||
|
@ -1,4 +1,5 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
|
||||
@ -6,9 +7,10 @@ import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from easycv.file import io
|
||||
from easycv import file
|
||||
from easycv.framework.errors import IOError
|
||||
from easycv.utils.constant import MAX_READ_IMAGE_TRY_TIMES
|
||||
from .utils import is_oss_path
|
||||
from .utils import is_oss_path, is_url_path
|
||||
|
||||
|
||||
def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
|
||||
@ -19,16 +21,31 @@ def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
|
||||
img = None
|
||||
while try_cnt < max_try_times:
|
||||
try:
|
||||
with io.open(img_path, 'rb') as infile:
|
||||
# cv2.imdecode may corrupt when the img is broken
|
||||
image = Image.open(infile) # RGB
|
||||
if is_url_path(img_path):
|
||||
from mmcv.fileio.file_client import HTTPBackend
|
||||
client = HTTPBackend()
|
||||
img_bytes = client.get(img_path)
|
||||
buff = io.BytesIO(img_bytes)
|
||||
image = Image.open(buff)
|
||||
if mode.upper() != 'BGR' and image.mode.upper() != mode.upper(
|
||||
):
|
||||
image = image.convert(mode.upper())
|
||||
img = np.asarray(image, dtype=np.uint8)
|
||||
if mode.upper() == 'BGR':
|
||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||
assert mode.upper() in ['RGB', 'BGR'
|
||||
], 'Only support `RGB` and `BGR` mode!'
|
||||
assert img is not None
|
||||
break
|
||||
else:
|
||||
with file.io.open(img_path, 'rb') as infile:
|
||||
# cv2.imdecode may corrupt when the img is broken
|
||||
image = Image.open(infile)
|
||||
if mode.upper() != 'BGR' and image.mode.upper(
|
||||
) != mode.upper():
|
||||
image = image.convert(mode.upper())
|
||||
img = np.asarray(image, dtype=np.uint8)
|
||||
|
||||
if mode.upper() == 'BGR':
|
||||
if image.mode.upper() != 'RGB':
|
||||
image = image.convert('RGB')
|
||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||
assert img is not None
|
||||
break
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
logging.warning('Read file {} fault, try count : {}'.format(
|
||||
@ -43,6 +60,6 @@ def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
|
||||
try_cnt += 1
|
||||
|
||||
if img is None:
|
||||
raise ValueError('Read Image Error: ' + img_path)
|
||||
raise IOError('Read Image Error: ' + img_path)
|
||||
|
||||
return img
|
||||
|
@ -10,8 +10,10 @@ from io import StringIO
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
|
||||
OSS_PREFIX = 'oss://'
|
||||
URL_PREFIX = 'https://'
|
||||
URL_PREFIX = ('https://', 'http://')
|
||||
|
||||
|
||||
def create_namedtuple(**kwargs):
|
||||
@ -31,6 +33,7 @@ def url_path_exists(url):
|
||||
urllib.request.urlopen(url).code
|
||||
except Exception as err:
|
||||
print(err)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
|
0
easycv/framework/__init__.py
Normal file
0
easycv/framework/__init__.py
Normal file
128
easycv/framework/errors.py
Normal file
128
easycv/framework/errors.py
Normal file
@ -0,0 +1,128 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
# total 64 bit
|
||||
# 63~64 (question category): 01 (user), ...
|
||||
# 60~62 (error severity): 001 (ERROR), 010(WARNING), 011(INFO), 100 (DEBUG), ...
|
||||
# 54~59 (product): 00000011 (PAI)
|
||||
# 49~53 (sub product): 00000 (none)
|
||||
# 41~48 (framework): 00000001 (tensorflow), 00000010 (pytorch)
|
||||
# 1~40 (error code)
|
||||
OK = 0x5818008000000000
|
||||
RUNTIME = 0x4818008000000001
|
||||
UNIMPLEMENTED = 0x4818008000000002
|
||||
INVALID_ARGUMENT = 0x4818008000000003
|
||||
INVALID_VALUE = 0x4818008000000004
|
||||
INVALID_KEY = 0x4818008000000005
|
||||
INVALID_TYPE = 0x4818008000000006
|
||||
MODULE_NOT_FOUND = 0x4818008000000007
|
||||
FILE_NOT_FOUND = 0x4818008000000008
|
||||
IO_FAILED = 0x4818008000000009
|
||||
PERMISSION_DENIED = 0x481800800000000a
|
||||
TIMEOUT = 0x481800800000000b
|
||||
|
||||
|
||||
class BaseError(Exception):
|
||||
"""The base error class for exceptions.
|
||||
"""
|
||||
code = None
|
||||
|
||||
def __init__(self, message='', details=None, op=None):
|
||||
"""Creates a new `OpError` indicating that a particular op failed.
|
||||
|
||||
Args:
|
||||
message: The message string describing the failure.
|
||||
details: The help message that handle the error.
|
||||
op: The `ops.Operation` that failed, if known; otherwise None. During
|
||||
eager execution, this field is always `None`.
|
||||
"""
|
||||
super(BaseError, self).__init__()
|
||||
self._op = op
|
||||
self._message = message
|
||||
self._details = details
|
||||
|
||||
@property
|
||||
def message(self):
|
||||
"""The error message that describes the error."""
|
||||
return self._message
|
||||
|
||||
@property
|
||||
def details(self):
|
||||
"""The help message that handle the error."""
|
||||
return self._details
|
||||
|
||||
@property
|
||||
def op(self):
|
||||
"""The operation that failed, if known.
|
||||
Returns:
|
||||
The `Operation` that failed, or None.
|
||||
"""
|
||||
return self._op
|
||||
|
||||
@property
|
||||
def error_code(self):
|
||||
"""The integer error code that describes the error."""
|
||||
return hex(self.code)
|
||||
|
||||
def __str__(self):
|
||||
print_str = 'ErrorCode: ' + self.error_code
|
||||
if self.op is not None:
|
||||
print_str += '\n' + 'Operation: ' + str(self.op)
|
||||
print_str += '\n' + 'Message: ' + self.message
|
||||
if self.details is not None:
|
||||
print_str += '\n' + 'Details: ' + self.details
|
||||
return print_str
|
||||
|
||||
|
||||
class NotImplementedError(BaseError):
|
||||
"""Raised when an operation has not been implemented."""
|
||||
code = UNIMPLEMENTED
|
||||
|
||||
|
||||
class RuntimeError(BaseError):
|
||||
"""Raised when the system experiences an internal error."""
|
||||
code = RUNTIME
|
||||
|
||||
|
||||
class PermissionDeniedError(BaseError):
|
||||
"""Raised when the caller does not have permission to run an operation."""
|
||||
code = PERMISSION_DENIED
|
||||
|
||||
|
||||
class FileNotFoundError(BaseError):
|
||||
"""Raised when a requested entity was not found."""
|
||||
code = FILE_NOT_FOUND
|
||||
|
||||
|
||||
class ModuleNotFoundError(BaseError):
|
||||
"""Raised when a module could not be located."""
|
||||
code = MODULE_NOT_FOUND
|
||||
|
||||
|
||||
class InvalidArgumentError(BaseError):
|
||||
"""Raised when an operation receives an invalid argument."""
|
||||
code = INVALID_ARGUMENT
|
||||
|
||||
|
||||
class TimeoutError(BaseError):
|
||||
"""Raised when an operation timed out."""
|
||||
code = TIMEOUT
|
||||
|
||||
|
||||
class IOError(BaseError):
|
||||
"""Raised when an operation returns a system-related error, including I/O failures."""
|
||||
code = IO_FAILED
|
||||
|
||||
|
||||
class ValueError(BaseError):
|
||||
"""Raised when an operation receives an invalid value."""
|
||||
code = INVALID_VALUE
|
||||
|
||||
|
||||
class KeyError(BaseError):
|
||||
"""Raised when a mapping (dictionary) key is not found in the set of existing keys."""
|
||||
code = INVALID_KEY
|
||||
|
||||
|
||||
class TypeError(BaseError):
|
||||
"""Raised when an operation or function is applied to an object of inappropriate type."""
|
||||
code = INVALID_TYPE
|
@ -13,7 +13,8 @@ from .eval_hook import DistEvalHook, EvalHook
|
||||
from .export_hook import ExportHook
|
||||
from .extractor import Extractor
|
||||
from .logger import PreLoggerHook
|
||||
from .lr_update_hook import StepFixCosineAnnealingLrUpdaterHook
|
||||
from .lr_update_hook import (CosineAnnealingWarmupByEpochLrUpdaterHook,
|
||||
StepFixCosineAnnealingLrUpdaterHook)
|
||||
from .optimizer_hook import OptimizerHook
|
||||
from .oss_sync_hook import OSSSyncHook
|
||||
from .registry import HOOKS
|
||||
@ -33,7 +34,8 @@ __all__ = [
|
||||
'OSSSyncHook', 'HOOKS', 'TIMEHook', 'SWAVHook', 'SyncNormHook',
|
||||
'SyncRandomSizeHook', 'TensorboardLoggerHookV2', 'WandbLoggerHookV2',
|
||||
'YOLOXLrUpdaterHook', 'YOLOXModeSwitchHook', 'MixupCollateHook',
|
||||
'PreLoggerHook', 'StepFixCosineAnnealingLrUpdaterHook', 'ThroughputHook'
|
||||
'PreLoggerHook', 'StepFixCosineAnnealingLrUpdaterHook',
|
||||
'CosineAnnealingWarmupByEpochLrUpdaterHook', 'ThroughputHook'
|
||||
]
|
||||
|
||||
if LooseVersion(torch.__version__) >= LooseVersion('1.6.0'):
|
||||
|
@ -7,6 +7,7 @@ from mmcv.runner import Hook
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from easycv.datasets.loader.loader_wrapper import TorchaccLoaderWrapper
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.hooks.tensorboard import TensorboardLoggerHookV2
|
||||
from easycv.hooks.wandb import WandbLoggerHookV2
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.utils.collect import dist_forward_collect, nondist_forward_collect
|
||||
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from mmcv import runner
|
||||
from mmcv.runner import HOOKS
|
||||
from mmcv.runner.hooks.lr_updater import (CosineAnnealingLrUpdaterHook,
|
||||
annealing_cos)
|
||||
@ -54,3 +55,29 @@ class StepFixCosineAnnealingLrUpdaterHook(CosineAnnealingLrUpdaterHook):
|
||||
target_lr = self.min_lr
|
||||
|
||||
return annealing_cos(base_lr, target_lr, progress / max_progress)
|
||||
|
||||
|
||||
@HOOKS.register_module()
|
||||
class CosineAnnealingWarmupByEpochLrUpdaterHook(CosineAnnealingLrUpdaterHook):
|
||||
|
||||
def before_train_iter(self, runner: 'runner.BaseRunner'):
|
||||
cur_iter = runner.iter
|
||||
epoch_len = len(runner.data_loader)
|
||||
assert isinstance(self.warmup_iters, int)
|
||||
if not self.by_epoch:
|
||||
self.regular_lr = self.get_regular_lr(runner)
|
||||
if self.warmup is None or cur_iter >= self.warmup_iters:
|
||||
self._set_lr(runner, self.regular_lr)
|
||||
else:
|
||||
if cur_iter % epoch_len == 0:
|
||||
warmup_lr = self.get_warmup_lr(cur_iter)
|
||||
self._set_lr(runner, warmup_lr)
|
||||
elif self.by_epoch:
|
||||
if self.warmup is None or cur_iter > self.warmup_iters:
|
||||
return
|
||||
elif cur_iter == self.warmup_iters:
|
||||
self._set_lr(runner, self.regular_lr)
|
||||
else:
|
||||
if cur_iter % epoch_len == 0:
|
||||
warmup_lr = self.get_warmup_lr(cur_iter)
|
||||
self._set_lr(runner, warmup_lr)
|
||||
|
@ -6,6 +6,7 @@ import torch
|
||||
from mmcv.parallel import is_module_wrapper
|
||||
from mmcv.runner import OptimizerHook as _OptimizerHook
|
||||
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.utils.dist_utils import get_dist_info
|
||||
from easycv.utils.torchacc_util import is_torchacc_enabled
|
||||
|
||||
@ -134,7 +135,7 @@ class AMPFP16OptimizerHook(OptimizerHook):
|
||||
elif isinstance(loss_scale, dict):
|
||||
self.scaler = amp.GradScaler(**loss_scale)
|
||||
else:
|
||||
raise ValueError(
|
||||
raise TypeError(
|
||||
'`loss_scale` type must be in [float, dict], but got {loss_scale}'
|
||||
)
|
||||
|
||||
|
@ -21,4 +21,5 @@ from .resnet_jit import ResNetJIT
|
||||
from .resnext import ResNeXt
|
||||
from .shuffle_transformer import ShuffleTransformer
|
||||
from .swin_transformer import SwinTransformer
|
||||
from .vision_transformer import VisionTransformer
|
||||
from .vitdet import ViTDet
|
||||
|
@ -10,7 +10,7 @@ from timm.models.layers import trunc_normal_
|
||||
from easycv.models.registry import BACKBONES
|
||||
from easycv.models.utils import DropPath
|
||||
from easycv.models.utils.pos_embed import get_2d_sincos_pos_embed
|
||||
from .vit_transfomer_dynamic import Block
|
||||
from .vision_transformer import Block
|
||||
|
||||
|
||||
class PatchEmbed(nn.Module):
|
||||
|
@ -7,6 +7,7 @@ from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
|
||||
normal_init)
|
||||
from torch.nn.modules.batchnorm import _BatchNorm
|
||||
|
||||
from easycv.framework.errors import NotImplementedError, TypeError, ValueError
|
||||
from easycv.models.registry import BACKBONES
|
||||
from ..modelzoo import hrnet as model_urls
|
||||
from .resnet import BasicBlock
|
||||
|
@ -11,6 +11,7 @@ from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
|
||||
normal_init)
|
||||
from torch.nn.modules.batchnorm import _BatchNorm
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
from easycv.models.registry import BACKBONES
|
||||
|
||||
|
||||
|
@ -13,6 +13,7 @@ from mmcv.cnn.utils.weight_init import (constant_init, normal_init,
|
||||
trunc_normal_init)
|
||||
from mmcv.runner import BaseModule, ModuleList, Sequential
|
||||
|
||||
from easycv.framework.errors import TypeError
|
||||
from easycv.models.registry import BACKBONES
|
||||
from easycv.models.segmentation.utils import (PatchEmbed, nchw_to_nlc,
|
||||
nlc_to_nchw)
|
||||
|
@ -5,6 +5,7 @@ r""" This model is taken from the official PyTorch model zoo.
|
||||
|
||||
from torch import nn
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
from ..modelzoo import mobilenetv2 as model_urls
|
||||
from ..registry import BACKBONES
|
||||
|
||||
|
@ -7,6 +7,7 @@ import torch.nn as nn
|
||||
from timm.models.helpers import load_pretrained
|
||||
from timm.models.hub import download_cached_file
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
from easycv.utils.logger import get_root_logger, print_log
|
||||
from ..modelzoo import timm_models as model_urls
|
||||
from ..registry import BACKBONES
|
||||
@ -16,11 +17,11 @@ from .shuffle_transformer import (shuffletrans_base_p4_w7_224,
|
||||
from .swin_transformer_dynamic import (dynamic_swin_base_p4_w7_224,
|
||||
dynamic_swin_small_p4_w7_224,
|
||||
dynamic_swin_tiny_p4_w7_224)
|
||||
from .vit_transfomer_dynamic import (dynamic_deit_small_p16,
|
||||
dynamic_deit_tiny_p16,
|
||||
dynamic_vit_base_p16,
|
||||
dynamic_vit_huge_p14,
|
||||
dynamic_vit_large_p16)
|
||||
from .vit_transformer_dynamic import (dynamic_deit_small_p16,
|
||||
dynamic_deit_tiny_p16,
|
||||
dynamic_vit_base_p16,
|
||||
dynamic_vit_huge_p14,
|
||||
dynamic_vit_large_p16)
|
||||
from .xcit_transformer import (xcit_large_24_p8, xcit_medium_24_p8,
|
||||
xcit_medium_24_p16, xcit_small_12_p8,
|
||||
xcit_small_12_p16)
|
||||
@ -36,7 +37,7 @@ _MODEL_MAP = {
|
||||
'dynamic_swin_small_p4_w7_224': dynamic_swin_small_p4_w7_224,
|
||||
'dynamic_swin_base_p4_w7_224': dynamic_swin_base_p4_w7_224,
|
||||
|
||||
# vit_transfomer_dynamic
|
||||
# vit_transformer_dynamic
|
||||
'dynamic_deit_small_p16': dynamic_deit_small_p16,
|
||||
'dynamic_deit_tiny_p16': dynamic_deit_tiny_p16,
|
||||
'dynamic_vit_base_p16': dynamic_vit_base_p16,
|
||||
|
@ -14,6 +14,7 @@ import torch.nn.functional as F
|
||||
from torch.nn import Conv2d, Module, ReLU
|
||||
from torch.nn.modules.utils import _pair
|
||||
|
||||
from easycv.framework.errors import KeyError, NotImplementedError, RuntimeError
|
||||
from ..registry import BACKBONES
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@ import torch.utils.checkpoint as cp
|
||||
from mmcv.cnn import constant_init, kaiming_init
|
||||
from torch.nn.modules.batchnorm import _BatchNorm
|
||||
|
||||
from easycv.framework.errors import KeyError
|
||||
from ..modelzoo import resnet as model_urls
|
||||
from ..registry import BACKBONES
|
||||
from ..utils import FReLU, build_conv_layer, build_norm_layer
|
||||
|
@ -6,6 +6,7 @@ import torch.nn as nn
|
||||
from mmcv.cnn import constant_init, kaiming_init
|
||||
from torch.nn.modules.batchnorm import _BatchNorm
|
||||
|
||||
from easycv.framework.errors import KeyError
|
||||
from ..registry import BACKBONES
|
||||
from ..utils import build_conv_layer, build_norm_layer
|
||||
|
||||
|
@ -7,6 +7,7 @@ from einops import rearrange
|
||||
from timm.models.layers import DropPath, trunc_normal_
|
||||
from torch import nn
|
||||
|
||||
from easycv.framework.errors import NotImplementedError
|
||||
from ..registry import BACKBONES
|
||||
|
||||
|
||||
|
283
easycv/models/backbones/vision_transformer.py
Normal file
283
easycv/models/backbones/vision_transformer.py
Normal file
@ -0,0 +1,283 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
"""
|
||||
Mostly copy-paste from timm library.
|
||||
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
|
||||
|
||||
"""
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from timm.models.layers import trunc_normal_
|
||||
|
||||
from easycv.models.utils import DropPath, Mlp
|
||||
from ..registry import BACKBONES
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads=8,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
attn_drop=0.,
|
||||
proj_drop=0.):
|
||||
super().__init__()
|
||||
self.num_heads = num_heads
|
||||
head_dim = dim // num_heads
|
||||
self.scale = qk_scale or head_dim**-0.5
|
||||
|
||||
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
||||
self.attn_drop = nn.Dropout(attn_drop)
|
||||
self.proj = nn.Linear(dim, dim)
|
||||
self.proj_drop = nn.Dropout(proj_drop)
|
||||
|
||||
def forward(self, x, rel_pos_bias=None):
|
||||
B, N, C = x.shape
|
||||
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
|
||||
C // self.num_heads).permute(2, 0, 3, 1, 4)
|
||||
q, k, v = qkv[0], qkv[1], qkv[2]
|
||||
|
||||
attn = (q @ k.transpose(-2, -1)) * self.scale
|
||||
|
||||
if rel_pos_bias is not None:
|
||||
attn = attn + rel_pos_bias
|
||||
|
||||
attn = attn.softmax(dim=-1)
|
||||
attn = self.attn_drop(attn)
|
||||
|
||||
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
||||
x = self.proj(x)
|
||||
x = self.proj_drop(x)
|
||||
return x, attn
|
||||
|
||||
|
||||
class Block(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop=0.,
|
||||
attn_drop=0.,
|
||||
drop_path=0.,
|
||||
act_layer=nn.GELU,
|
||||
norm_layer=nn.LayerNorm,
|
||||
use_layer_scale=False,
|
||||
init_values=1e-4):
|
||||
super().__init__()
|
||||
self.norm1 = norm_layer(dim)
|
||||
self.attn = Attention(
|
||||
dim,
|
||||
num_heads=num_heads,
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
attn_drop=attn_drop,
|
||||
proj_drop=drop)
|
||||
self.drop_path = DropPath(
|
||||
drop_path) if drop_path > 0. else nn.Identity()
|
||||
self.norm2 = norm_layer(dim)
|
||||
mlp_hidden_dim = int(dim * mlp_ratio)
|
||||
self.mlp = Mlp(
|
||||
in_features=dim,
|
||||
hidden_features=mlp_hidden_dim,
|
||||
act_layer=act_layer,
|
||||
drop=drop)
|
||||
self.use_layer_scale = use_layer_scale
|
||||
if self.use_layer_scale:
|
||||
self.gamma_1 = nn.Parameter(
|
||||
init_values * torch.ones((dim)), requires_grad=True)
|
||||
self.gamma_2 = nn.Parameter(
|
||||
init_values * torch.ones((dim)), requires_grad=True)
|
||||
|
||||
def forward(self, x, return_attention=False, rel_pos_bias=None):
|
||||
y, attn = self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)
|
||||
if return_attention:
|
||||
return attn
|
||||
if self.use_layer_scale:
|
||||
x = x + self.drop_path(self.gamma_1 * y)
|
||||
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
|
||||
else:
|
||||
x = x + self.drop_path(y)
|
||||
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
||||
return x
|
||||
|
||||
def forward_fea_and_attn(self, x):
|
||||
y, attn = self.attn(self.norm1(x))
|
||||
if self.use_layer_scale:
|
||||
x = x + self.drop_path(self.gamma_1 * y)
|
||||
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
|
||||
else:
|
||||
x = x + self.drop_path(y)
|
||||
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
||||
return x, attn
|
||||
|
||||
|
||||
class PatchEmbed(nn.Module):
|
||||
""" Image to Patch Embedding
|
||||
"""
|
||||
|
||||
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
|
||||
super().__init__()
|
||||
num_patches = (img_size // patch_size) * (img_size // patch_size)
|
||||
self.img_size = img_size
|
||||
self.patch_size = patch_size
|
||||
self.num_patches = num_patches
|
||||
|
||||
self.proj = nn.Conv2d(
|
||||
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
|
||||
|
||||
def forward(self, x):
|
||||
B, C, H, W = x.shape
|
||||
x = self.proj(x).flatten(2).transpose(1, 2)
|
||||
return x
|
||||
|
||||
|
||||
@BACKBONES.register_module
|
||||
class VisionTransformer(nn.Module):
|
||||
""" DeiT III is based on ViT. It uses some strategies to make the vit model
|
||||
better, just like layer scale, stochastic depth, 3-Augment.
|
||||
|
||||
Paper link: https://arxiv.org/pdf/2204.07118.pdf (DeiT III: Revenge of the ViT)
|
||||
|
||||
Args:
|
||||
img_size (list): Input image size. img_size=[224] means the image size is
|
||||
224*224. img_size=[192, 224] means the image size is 192*224.
|
||||
patch_size (int): The patch size. Default: 16
|
||||
in_chans (int): The num of input channels. Default: 3
|
||||
num_classes (int): The num of picture classes. Default: 1000
|
||||
embed_dim (int): The dimensions of embedding. Default: 768
|
||||
depth (int): The num of blocks. Default: 12
|
||||
num_heads (int): Parallel attention heads. Default: 12
|
||||
mlp_ratio (float): Mlp expansion ratio. Default: 4.0
|
||||
qkv_bias (bool): Does kqv use bias. Default: False
|
||||
qk_scale (float | None): In the step of self-attention, if qk_scale is not
|
||||
None, it will use qk_scale to scale the q @ k. Otherwise it will use
|
||||
head_dim**-0.5 instead of qk_scale. Default: None
|
||||
drop_rate (float): Probability of an element to be zeroed after the feed
|
||||
forward layer. Default: 0.0
|
||||
drop_path_rate (float): Stochastic depth rate. Default: 0
|
||||
norm_layer (nn.Module): normalization layer
|
||||
global_pool (bool): Global pool before head. Default: False
|
||||
use_layer_scale (bool): If use_layer_scale is True, it will use layer
|
||||
scale. Default: False
|
||||
init_scale (float): It is used for layer scale in Block to scale the
|
||||
gamma_1 and gamma_2.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
img_size=[224],
|
||||
patch_size=16,
|
||||
in_chans=3,
|
||||
num_classes=1000,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
||||
global_pool=False,
|
||||
use_layer_scale=False,
|
||||
init_scale=1e-4,
|
||||
**kwargs):
|
||||
super().__init__()
|
||||
|
||||
self.num_features = self.embed_dim = embed_dim
|
||||
self.num_heads = num_heads
|
||||
self.mlp_ratio = mlp_ratio
|
||||
self.qkv_bias = qkv_bias
|
||||
self.qk_scale = qk_scale
|
||||
self.drop_rate = drop_rate
|
||||
self.attn_drop_rate = attn_drop_rate
|
||||
self.norm_layer = norm_layer
|
||||
self.use_layer_scale = use_layer_scale
|
||||
self.init_scale = init_scale
|
||||
|
||||
self.patch_embed = PatchEmbed(
|
||||
img_size=img_size[0],
|
||||
patch_size=patch_size,
|
||||
in_chans=in_chans,
|
||||
embed_dim=embed_dim)
|
||||
num_patches = self.patch_embed.num_patches
|
||||
|
||||
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
|
||||
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
|
||||
self.pos_drop = nn.Dropout(p=drop_rate)
|
||||
|
||||
self.drop_path_rate = drop_path_rate
|
||||
self.depth = depth
|
||||
dpr = [drop_path_rate for i in range(depth)]
|
||||
self.blocks = nn.ModuleList([
|
||||
Block(
|
||||
dim=embed_dim,
|
||||
num_heads=num_heads,
|
||||
mlp_ratio=mlp_ratio,
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
drop=drop_rate,
|
||||
attn_drop=attn_drop_rate,
|
||||
drop_path=dpr[i],
|
||||
norm_layer=norm_layer,
|
||||
use_layer_scale=use_layer_scale,
|
||||
init_values=init_scale) for i in range(depth)
|
||||
])
|
||||
self.norm = norm_layer(embed_dim)
|
||||
|
||||
# Classifier head
|
||||
self.head = nn.Linear(
|
||||
embed_dim, num_classes) if num_classes > 0 else nn.Identity()
|
||||
|
||||
# Use global average pooling
|
||||
self.global_pool = global_pool
|
||||
if self.global_pool:
|
||||
self.fc_norm = norm_layer(embed_dim)
|
||||
self.norm = None
|
||||
|
||||
def init_weights(self):
|
||||
trunc_normal_(self.pos_embed, std=.02)
|
||||
trunc_normal_(self.cls_token, std=.02)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight, std=.02)
|
||||
if isinstance(m, nn.Linear) and m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
nn.init.constant_(m.weight, 1.0)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
x = self.forward_features(x)
|
||||
x = self.pos_drop(x)
|
||||
x = self.head(x)
|
||||
|
||||
return [x]
|
||||
|
||||
def forward_features(self, x):
|
||||
B = x.shape[0]
|
||||
x = self.patch_embed(x)
|
||||
|
||||
cls_tokens = self.cls_token.expand(B, -1, -1)
|
||||
|
||||
x = x + self.pos_embed
|
||||
x = torch.cat((cls_tokens, x), dim=1)
|
||||
|
||||
for blk in self.blocks:
|
||||
x = blk(x)
|
||||
if self.norm is not None:
|
||||
x = self.norm(x)
|
||||
|
||||
if self.global_pool:
|
||||
x = x[:, 1:, :].mean(dim=1)
|
||||
return self.fc_norm(x)
|
||||
else:
|
||||
return x[:, 0]
|
@ -12,198 +12,51 @@ from functools import partial
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from timm.models.layers import trunc_normal_
|
||||
|
||||
from easycv.models.utils import DropPath, Mlp
|
||||
from easycv.models.backbones.vision_transformer import Block, VisionTransformer
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
class DynamicVisionTransformer(VisionTransformer):
|
||||
"""Dynamic Vision Transformer
|
||||
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads=8,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
attn_drop=0.,
|
||||
proj_drop=0.):
|
||||
super().__init__()
|
||||
self.num_heads = num_heads
|
||||
head_dim = dim // num_heads
|
||||
self.scale = qk_scale or head_dim**-0.5
|
||||
Args:
|
||||
use_dense_prediction (bool): If use_dense_prediction is True, the global
|
||||
pool and norm will before head will be removed.(if any) Default: False
|
||||
|
||||
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
||||
self.attn_drop = nn.Dropout(attn_drop)
|
||||
self.proj = nn.Linear(dim, dim)
|
||||
self.proj_drop = nn.Dropout(proj_drop)
|
||||
|
||||
def forward(self, x, rel_pos_bias=None):
|
||||
B, N, C = x.shape
|
||||
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
|
||||
C // self.num_heads).permute(2, 0, 3, 1, 4)
|
||||
q, k, v = qkv[0], qkv[1], qkv[2]
|
||||
|
||||
attn = (q @ k.transpose(-2, -1)) * self.scale
|
||||
|
||||
if rel_pos_bias is not None:
|
||||
attn = attn + rel_pos_bias
|
||||
|
||||
attn = attn.softmax(dim=-1)
|
||||
attn = self.attn_drop(attn)
|
||||
|
||||
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
||||
x = self.proj(x)
|
||||
x = self.proj_drop(x)
|
||||
return x, attn
|
||||
|
||||
|
||||
class Block(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop=0.,
|
||||
attn_drop=0.,
|
||||
drop_path=0.,
|
||||
act_layer=nn.GELU,
|
||||
norm_layer=nn.LayerNorm):
|
||||
super().__init__()
|
||||
self.norm1 = norm_layer(dim)
|
||||
self.attn = Attention(
|
||||
dim,
|
||||
num_heads=num_heads,
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
attn_drop=attn_drop,
|
||||
proj_drop=drop)
|
||||
self.drop_path = DropPath(
|
||||
drop_path) if drop_path > 0. else nn.Identity()
|
||||
self.norm2 = norm_layer(dim)
|
||||
mlp_hidden_dim = int(dim * mlp_ratio)
|
||||
self.mlp = Mlp(
|
||||
in_features=dim,
|
||||
hidden_features=mlp_hidden_dim,
|
||||
act_layer=act_layer,
|
||||
drop=drop)
|
||||
|
||||
def forward(self, x, return_attention=False, rel_pos_bias=None):
|
||||
y, attn = self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)
|
||||
if return_attention:
|
||||
return attn
|
||||
x = x + self.drop_path(y)
|
||||
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
||||
return x
|
||||
|
||||
def forward_fea_and_attn(self, x):
|
||||
y, attn = self.attn(self.norm1(x))
|
||||
x = x + self.drop_path(y)
|
||||
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
||||
return x, attn
|
||||
|
||||
|
||||
class PatchEmbed(nn.Module):
|
||||
""" Image to Patch Embedding
|
||||
"""
|
||||
|
||||
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
|
||||
super().__init__()
|
||||
num_patches = (img_size // patch_size) * (img_size // patch_size)
|
||||
self.img_size = img_size
|
||||
self.patch_size = patch_size
|
||||
self.num_patches = num_patches
|
||||
def __init__(self, use_dense_prediction=False, **kwargs):
|
||||
super(DynamicVisionTransformer, self).__init__(**kwargs)
|
||||
|
||||
self.proj = nn.Conv2d(
|
||||
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
|
||||
|
||||
def forward(self, x):
|
||||
B, C, H, W = x.shape
|
||||
x = self.proj(x).flatten(2).transpose(1, 2)
|
||||
return x
|
||||
|
||||
|
||||
class DynamicVisionTransformer(nn.Module):
|
||||
"""Dynamic Vision Transformer """
|
||||
|
||||
def __init__(self,
|
||||
img_size=[224],
|
||||
patch_size=16,
|
||||
in_chans=3,
|
||||
num_classes=0,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
norm_layer=nn.LayerNorm,
|
||||
use_dense_prediction=False,
|
||||
global_pool=False,
|
||||
**kwargs):
|
||||
super().__init__()
|
||||
self.num_features = self.embed_dim = embed_dim
|
||||
|
||||
self.patch_embed = PatchEmbed(
|
||||
img_size=img_size[0],
|
||||
patch_size=patch_size,
|
||||
in_chans=in_chans,
|
||||
embed_dim=embed_dim)
|
||||
num_patches = self.patch_embed.num_patches
|
||||
|
||||
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
|
||||
self.pos_embed = nn.Parameter(
|
||||
torch.zeros(1, num_patches + 1, embed_dim))
|
||||
self.pos_drop = nn.Dropout(p=drop_rate)
|
||||
torch.zeros(1, num_patches + 1, self.embed_dim))
|
||||
|
||||
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
|
||||
] # stochastic depth decay rule
|
||||
dpr = [
|
||||
x.item()
|
||||
for x in torch.linspace(0, self.drop_path_rate, self.depth)
|
||||
]
|
||||
self.blocks = nn.ModuleList([
|
||||
Block(
|
||||
dim=embed_dim,
|
||||
num_heads=num_heads,
|
||||
mlp_ratio=mlp_ratio,
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
drop=drop_rate,
|
||||
attn_drop=attn_drop_rate,
|
||||
dim=self.embed_dim,
|
||||
num_heads=self.num_heads,
|
||||
mlp_ratio=self.mlp_ratio,
|
||||
qkv_bias=self.qkv_bias,
|
||||
qk_scale=self.qk_scale,
|
||||
drop=self.drop_rate,
|
||||
attn_drop=self.attn_drop_rate,
|
||||
drop_path=dpr[i],
|
||||
norm_layer=norm_layer) for i in range(depth)
|
||||
norm_layer=self.norm_layer,
|
||||
use_layer_scale=self.use_layer_scale,
|
||||
init_values=self.init_scale) for i in range(self.depth)
|
||||
])
|
||||
self.norm = norm_layer(embed_dim)
|
||||
|
||||
# Classifier head
|
||||
self.head = nn.Linear(
|
||||
embed_dim, num_classes) if num_classes > 0 else nn.Identity()
|
||||
|
||||
# Dense prediction head
|
||||
self.use_dense_prediction = use_dense_prediction
|
||||
if self.use_dense_prediction:
|
||||
self.head_dense = None
|
||||
|
||||
|
||||
# Use global average pooling
|
||||
self.global_pool = global_pool
|
||||
if self.global_pool:
|
||||
self.fc_norm = norm_layer(embed_dim)
|
||||
self.norm = None
|
||||
|
||||
trunc_normal_(self.pos_embed, std=.02)
|
||||
trunc_normal_(self.cls_token, std=.02)
|
||||
|
||||
def init_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight, std=.02)
|
||||
if isinstance(m, nn.Linear) and m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
nn.init.constant_(m.weight, 1.0)
|
||||
|
||||
def forward(self, x):
|
||||
# convert to list
|
||||
if not isinstance(x, list):
|
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@ import torch.nn as nn
|
||||
from timm.models.layers import DropPath, to_2tuple, trunc_normal_
|
||||
from timm.models.vision_transformer import Mlp, _cfg
|
||||
|
||||
from easycv.framework.errors import ValueError
|
||||
from ..registry import BACKBONES
|
||||
|
||||
|
||||
@ -109,7 +110,7 @@ class ConvPatchEmbed(nn.Module):
|
||||
conv3x3(embed_dim // 2, embed_dim, 2),
|
||||
)
|
||||
else:
|
||||
raise (
|
||||
raise ValueError(
|
||||
'For convolutional projection, patch size has to be in [8, 16]'
|
||||
)
|
||||
|
||||
|
@ -8,6 +8,8 @@ import torch.distributed as dist
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
|
||||
from easycv.framework.errors import NotImplementedError, TypeError
|
||||
|
||||
|
||||
class BaseModel(nn.Module, metaclass=ABCMeta):
|
||||
''' base class for model. '''
|
||||
|
@ -7,6 +7,7 @@ import torch.nn as nn
|
||||
from mmcv.runner import get_dist_info
|
||||
from timm.data.mixup import Mixup
|
||||
|
||||
from easycv.framework.errors import KeyError, NotImplementedError, ValueError
|
||||
from easycv.utils.checkpoint import load_checkpoint
|
||||
from easycv.utils.logger import get_root_logger, print_log
|
||||
from easycv.utils.preprocess_function import (bninceptionPre, gaussianBlur,
|
||||
@ -53,22 +54,15 @@ class Classification(BaseModel):
|
||||
if 'mixUp' in train_preprocess:
|
||||
rank, _ = get_dist_info()
|
||||
np.random.seed(rank + 12)
|
||||
if not mixup_cfg:
|
||||
num_classes = head.get(
|
||||
'num_classes',
|
||||
1000) if 'num_classes' in head else backbone.get(
|
||||
'num_classes', 1000)
|
||||
mixup_cfg = dict(
|
||||
mixup_alpha=0.8,
|
||||
cutmix_alpha=1.0,
|
||||
cutmix_minmax=None,
|
||||
prob=1.0,
|
||||
switch_prob=0.5,
|
||||
mode='batch',
|
||||
label_smoothing=0.1,
|
||||
num_classes=num_classes)
|
||||
self.mixup = Mixup(**mixup_cfg)
|
||||
head.loss_config = {'type': 'SoftTargetCrossEntropy'}
|
||||
if mixup_cfg is not None:
|
||||
if 'num_classes' in mixup_cfg:
|
||||
self.mixup = Mixup(**mixup_cfg)
|
||||
elif 'num_classes' in head or 'num_classes' in backbone:
|
||||
num_classes = head.get(
|
||||
'num_classes'
|
||||
) if 'num_classes' in head else backbone.get('num_classes')
|
||||
mixup_cfg['num_classes'] = num_classes
|
||||
self.mixup = Mixup(**mixup_cfg)
|
||||
train_preprocess.remove('mixUp')
|
||||
self.train_preprocess = [
|
||||
self.preprocess_key_map[i] for i in train_preprocess
|
||||
@ -173,7 +167,10 @@ class Classification(BaseModel):
|
||||
for preprocess in self.train_preprocess:
|
||||
img = preprocess(img)
|
||||
|
||||
if hasattr(self, 'mixup'):
|
||||
# When the number of samples in the dataset is odd, the last batch size of each epoch will be odd,
|
||||
# which will cause mixup to report an error. To avoid this situation, mixup is applied only when
|
||||
# the batch size is even.
|
||||
if hasattr(self, 'mixup') and len(img) % 2 == 0:
|
||||
img, gt_labels = self.mixup(img, gt_labels)
|
||||
|
||||
x = self.forward_backbone(img)
|
||||
@ -304,4 +301,4 @@ class Classification(BaseModel):
|
||||
rv['gt_labels'] = gt_labels.cpu()
|
||||
return rv
|
||||
else:
|
||||
raise Exception('No such mode: {}'.format(mode))
|
||||
raise KeyError('No such mode: {}'.format(mode))
|
||||
|
@ -29,6 +29,8 @@ from torch.nn.init import constant_
|
||||
from torch.nn.modules.linear import Linear
|
||||
from torch.nn.modules.module import Module
|
||||
|
||||
from easycv.framework.errors import RuntimeError
|
||||
|
||||
try:
|
||||
from torch.overrides import has_torch_function, handle_torch_function
|
||||
except:
|
||||
|
@ -14,6 +14,7 @@ import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import Tensor, nn
|
||||
|
||||
from easycv.framework.errors import NotImplementedError, ValueError
|
||||
from easycv.models.builder import NECKS
|
||||
from easycv.models.detection.utils import inverse_sigmoid
|
||||
from easycv.models.utils import (MLP, TransformerEncoder,
|
||||
|
@ -1,4 +1,5 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from easycv.framework.errors import ValueError
|
||||
from easycv.models.base import BaseModel
|
||||
from easycv.models.builder import (MODELS, build_backbone, build_head,
|
||||
build_neck)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user