Merge branch 'master_github'

This commit is contained in:
wenmeng.zwm 2022-09-20 19:59:05 +08:00
commit 608949b936
175 changed files with 3109 additions and 1978 deletions

3
.gitignore vendored
View File

@ -137,6 +137,3 @@ pai_jobs/easycv/resources/
*.tar.gz
thirdparty/test
scripts/test
# easycv default cache dir
.easycv_cache

View File

@ -10,7 +10,7 @@ oss_io_config = dict(
buckets=['your oss buckets'])
# model settings
# 1920: merge 4 layers of features, open models/backbones/vit_transfomer_dynamic.py:311: self.forward_return_n_last_blocks
# 1920: merge 4 layers of features, open models/backbones/vit_transformer_dynamic.py:311: self.forward_return_n_last_blocks
# 384: default
feature_num = 1920
model = dict(

View File

@ -157,3 +157,6 @@ checkpoint_config = dict(interval=10)
# runtime settings
total_epochs = 50
# export config
export = dict(export_neck=True)

View File

@ -10,7 +10,7 @@ oss_io_config = dict(
buckets=['your oss buckets'])
# model settings
# 1920: merge 4 layers of features, open models/backbones/vit_transfomer_dynamic.py:311: self.forward_return_n_last_blocks
# 1920: merge 4 layers of features, open models/backbones/vit_transformer_dynamic.py:311: self.forward_return_n_last_blocks
# 384: default
feature_num = 1920
model = dict(

View File

@ -15,6 +15,7 @@ from mmcv.runner import get_dist_info, init_dist, load_checkpoint
from easycv.apis import set_random_seed
from easycv.datasets import build_dataloader, build_dataset
from easycv.file import io
from easycv.framework.errors import ValueError
from easycv.models import build_model
from easycv.utils.collect import dist_forward_collect, nondist_forward_collect
from easycv.utils.config_tools import mmcv_config_fromfile

View File

@ -3,6 +3,8 @@ import argparse
import torch
from easycv.framework.errors import ValueError
def parse_args():
parser = argparse.ArgumentParser(
@ -24,7 +26,7 @@ def main():
output_dict['state_dict'][key[9:]] = value
has_backbone = True
if not has_backbone:
raise Exception('Cannot find a backbone module in the checkpoint.')
raise ValueError('Cannot find a backbone module in the checkpoint.')
torch.save(output_dict, args.output)

View File

@ -2,11 +2,12 @@
import argparse
import os
import shutil
import sys
import time
import torch
from easycv.framework.errors import ValueError
args = argparse.ArgumentParser(description='Process some integers.')
args.add_argument(
'model_path',
@ -88,7 +89,7 @@ def extract_model(model_path):
output_dict['state_dict'][key[9:]] = value
has_backbone = True
if not has_backbone:
raise Exception('Cannot find a backbone module in the checkpoint.')
raise ValueError('Cannot find a backbone module in the checkpoint.')
torch.save(output_dict, backbone_file)
return backbone_file

View File

@ -86,3 +86,13 @@ checkpoint_config = dict(interval=10)
# runtime settings
total_epochs = 100
predict = dict(
type='ClassificationPredictor',
pipelines=[
dict(type='Resize', size=256),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Collect', keys=['img'])
])

View File

@ -0,0 +1,143 @@
# from PIL import Image
_base_ = 'configs/base.py'
log_config = dict(
interval=10,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
# model settings
model = dict(
type='Classification',
train_preprocess=['mixUp'],
pretrained=False,
mixup_cfg=dict(
mixup_alpha=0.8,
cutmix_alpha=1.0,
cutmix_minmax=None,
prob=1.0,
switch_prob=0.5,
mode='batch',
label_smoothing=0.0,
num_classes=1000),
backbone=dict(
type='VisionTransformer',
img_size=[192],
num_classes=1000,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
drop_rate=0.,
drop_path_rate=0.2,
use_layer_scale=True),
head=dict(
type='ClsHead',
loss_config=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0,
label_ceil=True),
with_fc=False,
use_num_classes=False))
data_train_list = 'data/imagenet1k/train.txt'
data_train_root = 'data/imagenet1k/train/'
data_test_list = 'data/imagenet1k/val.txt'
data_test_root = 'data/imagenet1k/val/'
dataset_type = 'ClsDataset'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
three_augment_policies = [[
dict(type='PILGaussianBlur', prob=1.0, radius_min=0.1, radius_max=2.0),
], [
dict(type='Solarization', threshold=128),
], [
dict(type='Grayscale', num_output_channels=3),
]]
train_pipeline = [
dict(
type='RandomResizedCrop', size=192, scale=(0.08, 1.0),
interpolation=3), # interpolation='bicubic'
dict(type='RandomHorizontalFlip'),
dict(type='MMAutoAugment', policies=three_augment_policies),
dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Collect', keys=['img', 'gt_labels'])
]
size = int((256 / 224) * 192)
test_pipeline = [
dict(type='Resize', size=size, interpolation=3),
dict(type='CenterCrop', size=192),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Collect', keys=['img', 'gt_labels'])
]
data = dict(
imgs_per_gpu=256,
workers_per_gpu=8,
use_repeated_augment_sampler=True,
train=dict(
type=dataset_type,
data_source=dict(
list_file=data_train_list,
root=data_train_root,
type='ClsSourceImageList'),
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_source=dict(
list_file=data_test_list,
root=data_test_root,
type='ClsSourceImageList'),
pipeline=test_pipeline))
eval_config = dict(initial=True, interval=1, gpu_collect=True)
eval_pipelines = [
dict(
mode='test',
data=data['val'],
dist_eval=True,
evaluators=[dict(type='ClsEvaluator', topk=(1, 5))],
)
]
# additional hooks
custom_hooks = []
# optimizer
optimizer = dict(
type='Lamb',
lr=0.003,
weight_decay=0.05,
eps=1e-8,
paramwise_options={
'cls_token': dict(weight_decay=0.),
'pos_embed': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'gamma_1': dict(weight_decay=0.),
'gamma_2': dict(weight_decay=0.),
})
optimizer_config = dict(grad_clip=None, update_interval=1)
lr_config = dict(
policy='CosineAnnealingWarmupByEpoch',
by_epoch=True,
min_lr_ratio=0.00001 / 0.003,
warmup='linear',
warmup_by_epoch=True,
warmup_iters=5,
warmup_ratio=0.000001 / 0.003,
)
checkpoint_config = dict(interval=10)
# runtime settings
total_epochs = 800
ema = dict(decay=0.99996)

View File

@ -0,0 +1,17 @@
_base_ = './deitiii_base_patch16_192.py'
# model settings
model = dict(
type='Classification',
backbone=dict(
type='VisionTransformer',
img_size=[192],
num_classes=1000,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
drop_rate=0.,
drop_path_rate=0.2,
use_layer_scale=True))

View File

@ -0,0 +1,17 @@
_base_ = './deitiii_base_patch16_192.py'
# model settings
model = dict(
type='Classification',
backbone=dict(
type='VisionTransformer',
img_size=[192],
num_classes=1000,
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
qkv_bias=True,
drop_rate=0.,
drop_path_rate=0.45,
use_layer_scale=True))

View File

@ -0,0 +1,86 @@
_base_ = './deitiii_base_patch16_192.py'
# model settings
model = dict(
type='Classification',
backbone=dict(
type='VisionTransformer',
img_size=[224],
num_classes=1000,
patch_size=16,
embed_dim=384,
depth=12,
num_heads=6,
mlp_ratio=4,
qkv_bias=True,
drop_rate=0.,
drop_path_rate=0.05,
use_layer_scale=True))
data_train_list = 'data/imagenet1k/train.txt'
data_train_root = 'data/imagenet1k/train/'
data_test_list = 'data/imagenet1k/val.txt'
data_test_root = 'data/imagenet1k/val/'
dataset_type = 'ClsDataset'
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
three_augment_policies = [[
dict(type='PILGaussianBlur', prob=1.0, radius_min=0.1, radius_max=2.0),
], [
dict(type='Solarization', threshold=128),
], [
dict(type='Grayscale', num_output_channels=3),
]]
train_pipeline = [
dict(
type='RandomResizedCrop', size=224, scale=(0.08, 1.0),
interpolation=3), # interpolation='bicubic'
dict(type='RandomHorizontalFlip'),
dict(type='MMAutoAugment', policies=three_augment_policies),
dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Collect', keys=['img', 'gt_labels'])
]
test_pipeline = [
dict(type='Resize', size=256, interpolation=3),
dict(type='CenterCrop', size=224),
dict(type='ToTensor'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Collect', keys=['img', 'gt_labels'])
]
data = dict(
imgs_per_gpu=256,
workers_per_gpu=8,
use_repeated_augment_sampler=True,
train=dict(
type=dataset_type,
data_source=dict(
list_file=data_train_list,
root=data_train_root,
type='ClsSourceImageList'),
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_source=dict(
list_file=data_test_list,
root=data_test_root,
type='ClsSourceImageList'),
pipeline=test_pipeline))
eval_pipelines = [
dict(
mode='test',
data=data['val'],
dist_eval=True,
evaluators=[dict(type='ClsEvaluator', topk=(1, 5))],
)
]
# optimizer
optimizer = dict(lr=0.004)
lr_config = dict(
min_lr_ratio=0.00001 / 0.004,
warmup_ratio=0.000001 / 0.004,
)

View File

@ -101,13 +101,15 @@ val_dataset = dict(
pipeline=test_pipeline)
data = dict(
imgs_per_gpu=1, workers_per_gpu=2, train=train_dataset, val=val_dataset)
imgs_per_gpu=4, workers_per_gpu=2, train=train_dataset, val=val_dataset
) # 64(total batch size) = 4 (batch size/per gpu) x 8 (gpu num) x 2(node)
# evaluation
eval_config = dict(interval=1, gpu_collect=False)
eval_config = dict(initial=False, interval=1, gpu_collect=False)
eval_pipelines = [
dict(
mode='test',
# dist_eval=True,
evaluators=[
dict(type='CocoDetectionEvaluator', classes=CLASSES),
],

View File

@ -101,13 +101,15 @@ val_dataset = dict(
pipeline=test_pipeline)
data = dict(
imgs_per_gpu=1, workers_per_gpu=2, train=train_dataset, val=val_dataset)
imgs_per_gpu=4, workers_per_gpu=2, train=train_dataset, val=val_dataset
) # 64(total batch size) = 4 (batch size/per gpu) x 8 (gpu num) x 2(node)
# evaluation
eval_config = dict(interval=1, gpu_collect=False)
eval_config = dict(initial=False, interval=1, gpu_collect=False)
eval_pipelines = [
dict(
mode='test',
# dist_eval=True,
evaluators=[
dict(type='CocoDetectionEvaluator', classes=CLASSES),
dict(type='CocoMaskEvaluator', classes=CLASSES)

View File

@ -1,3 +0,0 @@
_base_ = './vitdet_100e.py'
model = dict(backbone=dict(aggregation='basicblock'))

View File

@ -1,3 +0,0 @@
_base_ = './vitdet_100e.py'
model = dict(backbone=dict(aggregation='bottleneck'))

View File

@ -0,0 +1,231 @@
# model settings
norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
model = dict(
type='CascadeRCNN',
pretrained=pretrained,
backbone=dict(
type='ViTDet',
img_size=1024,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
drop_path_rate=0.1,
window_size=14,
mlp_ratio=4,
qkv_bias=True,
window_block_indexes=[
# 2, 5, 8 11 for global attention
0,
1,
3,
4,
6,
7,
9,
10,
],
residual_block_indexes=[],
use_rel_pos=True),
neck=dict(
type='SFP',
in_channels=768,
out_channels=256,
scale_factors=(4.0, 2.0, 1.0, 0.5),
norm_cfg=norm_cfg,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
num_convs=2,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
roi_head=dict(
type='CascadeRoIHead',
num_stages=3,
stage_loss_weights=[1, 0.5, 0.25],
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='Shared4Conv1FCBBoxHead',
conv_out_channels=256,
norm_cfg=norm_cfg,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared4Conv1FCBBoxHead',
conv_out_channels=256,
norm_cfg=norm_cfg,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared4Conv1FCBBoxHead',
conv_out_channels=256,
norm_cfg=norm_cfg,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
norm_cfg=norm_cfg,
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
]),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5)))
mmlab_modules = [
dict(type='mmdet', name='CascadeRCNN', module='model'),
dict(type='mmdet', name='RPNHead', module='head'),
dict(type='mmdet', name='CascadeRoIHead', module='head'),
]

View File

@ -0,0 +1,4 @@
_base_ = [
'./vitdet_cascade_mask_rcnn.py', './lsj_coco_instance.py',
'./vitdet_schedule_100e.py'
]

View File

@ -1,6 +1,6 @@
# model settings
norm_cfg = dict(type='GN', num_groups=1, requires_grad=True)
norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
model = dict(
@ -9,22 +9,32 @@ model = dict(
backbone=dict(
type='ViTDet',
img_size=1024,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
drop_path_rate=0.1,
window_size=14,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.1,
use_abs_pos_emb=True,
aggregation='attn',
),
window_block_indexes=[
# 2, 5, 8 11 for global attention
0,
1,
3,
4,
6,
7,
9,
10,
],
residual_block_indexes=[],
use_rel_pos=True),
neck=dict(
type='SFP',
in_channels=[768, 768, 768, 768],
in_channels=768,
out_channels=256,
scale_factors=(4.0, 2.0, 1.0, 0.5),
norm_cfg=norm_cfg,
num_outs=5),
rpn_head=dict(
@ -32,7 +42,6 @@ model = dict(
in_channels=256,
feat_channels=256,
num_convs=2,
norm_cfg=norm_cfg,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
@ -98,7 +107,7 @@ model = dict(
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',

View File

@ -1,4 +1,4 @@
_base_ = [
'./vitdet_faster_rcnn.py', './lsj_coco_detection.py',
'./vitdet_faster_rcnn.py', './lsj_coco_instance.py',
'./vitdet_schedule_100e.py'
]

View File

@ -1,6 +1,6 @@
# model settings
norm_cfg = dict(type='GN', num_groups=1, requires_grad=True)
norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
model = dict(
@ -9,22 +9,32 @@ model = dict(
backbone=dict(
type='ViTDet',
img_size=1024,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
drop_path_rate=0.1,
window_size=14,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.1,
use_abs_pos_emb=True,
aggregation='attn',
),
window_block_indexes=[
# 2, 5, 8 11 for global attention
0,
1,
3,
4,
6,
7,
9,
10,
],
residual_block_indexes=[],
use_rel_pos=True),
neck=dict(
type='SFP',
in_channels=[768, 768, 768, 768],
in_channels=768,
out_channels=256,
scale_factors=(4.0, 2.0, 1.0, 0.5),
norm_cfg=norm_cfg,
num_outs=5),
rpn_head=dict(
@ -32,7 +42,6 @@ model = dict(
in_channels=256,
feat_channels=256,
num_convs=2,
norm_cfg=norm_cfg,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
@ -112,7 +121,7 @@ model = dict(
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',

View File

@ -1,26 +1,29 @@
_base_ = 'configs/base.py'
log_config = dict(
interval=200,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
checkpoint_config = dict(interval=10)
# optimizer
paramwise_options = {
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'pos_embed': dict(weight_decay=0.),
'cls_token': dict(weight_decay=0.)
}
optimizer = dict(
type='AdamW',
lr=1e-4,
betas=(0.9, 0.999),
weight_decay=0.1,
paramwise_options=paramwise_options)
optimizer_config = dict(grad_clip=None, loss_scale=512.)
constructor='LayerDecayOptimizerConstructor',
paramwise_options=dict(num_layers=12, layer_decay_rate=0.7))
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=250,
warmup_ratio=0.067,
warmup_ratio=0.001,
step=[88, 96])
total_epochs = 100

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ee64c0caef841c61c7e6344b7fe2c07a38fba07a8de81ff38c0686c641e0a283
size 190356
oid sha256:c696a58a2963b5ac47317751f04ff45bfed4723f2f70bacf91eac711f9710e54
size 189432

View File

@ -156,7 +156,7 @@ easycv.models.backbones.swin\_transformer\_dynamic module
easycv.models.backbones.vit\_transfomer\_dynamic module
-------------------------------------------------------
.. automodule:: easycv.models.backbones.vit_transfomer_dynamic
.. automodule:: easycv.models.backbones.vit_transformer_dynamic
:members:
:undoc-members:
:show-inheritance:

View File

@ -21,6 +21,9 @@
| hrnetw64 | [hrnetw64](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/hrnet/imagenet_hrnetw64_jpg.py) | 79.884 | 95.04 | 5120 | 54.74 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/resnet/hrnetw64/epoch_100.pth) |
| vit-base-patch16 | [vit-base-patch16](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_vit_base_patch16_224_jpg.py) | 76.082 | 92.026 | 346 | 8.03 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/vit/vit-base-patch16/epoch_300.pth) |
| swin-tiny-patch4-window7 | [swin-tiny-patch4-window7](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/swint/imagenet_swin_tiny_patch4_window7_224_jpg.py) | 80.528 | 94.822 | 132 | 12.94 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/swint/swin-tiny-patch4-window7/epoch_300.pth) |
| deitiii-small-patch16-224 | [deitiii-small-patch16-224](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_small_patch16_224_jpg.py) | 81.408 | 95.388 | 89 | 4.53 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_small_patch16_224/deitiii_small.pth) |
| deitiii-base-patch16-192 | [deitiii-base-patch16-192](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_base_patch16_192_jpg.py) | 82.982 | 95.95 | 337 | 4.63 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_base_patch16_192/deitiii_base.pth) |
| deitiii-large-patch16-192 | [deitiii-large-patch16-192](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py) | 83.902 | 96.296 | 1170 | 10.17 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_large_patch16_192/deitiii_large.pth) |
(ps: 通过EasyCV训练得到模型结果推理的输入尺寸默认为224机器默认为V100 16G其中gpu memory记录的是gpu peak memory)

View File

@ -6,38 +6,37 @@ Inference default use V100 16G.
Pretrained on COCO2017 dataset. (The result has been optimized with PAI-Blade, and only computes the model inference time. To learn about end2end inference time, you can refer to [export.md](./tutorials/export.md).)
| Algorithm | Config | Params | Speed<sup>V100<br/><sub>fp16 b32 </sub> | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|--------|-----------------------------------------|-------------------------------------|------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| YOLOX-s | [yolox_s_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_s_8xb16_300e_coco.py) | 9M | 0.68ms | 40.0 | 58.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/log.txt) |
| PAI-YOLOXs | [yoloxs_pai_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_8xb16_300e_coco.py) | 16M | 0.71ms | 41.4 | 60.0 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs.json) |
| PAI-YOLOXs-ASFF | [yoloxs_pai_asff_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_8xb16_300e_coco.py) | 21M | 0.87ms | 42.8 | 61.8 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff.json) |
| Algorithm | Config | Params | Speed<sup>V100<br/><sub>fp16 b32 </sub> | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
| --------------------- | ------------------------------------------------------------ | ------ | --------------------------------------- | ----------------------------------- | ---------------------------- | ------------------------------------------------------------ |
| YOLOX-s | [yolox_s_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_s_8xb16_300e_coco.py) | 9M | 0.68ms | 40.0 | 58.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/log.txt) |
| PAI-YOLOXs | [yoloxs_pai_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_8xb16_300e_coco.py) | 16M | 0.71ms | 41.4 | 60.0 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs.json) |
| PAI-YOLOXs-ASFF | [yoloxs_pai_asff_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_8xb16_300e_coco.py) | 21M | 0.87ms | 42.8 | 61.8 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff.json) |
| PAI-YOLOXs-ASFF-TOOD3 | [yoloxs_pai_asff_tood3_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_tood3_8xb16_300e_coco.py) | 24M | 1.15ms | 43.9 | 62.1 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff_tood3.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff_tood3.json) |
| YOLOX-m | [yolox_m_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb16_300e_coco.py) | 25M | 1.52ms | 46.3 | 64.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/log.txt) |
| YOLOX-l | [yolox_l_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb8_300e_coco.py) | 54M | 2.47ms | 48.9 | 67.5 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/log.txt) |
| YOLOX-x | [yolox_x_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_x_8xb8_300e_coco.py) | 99M | 4.74ms | 50.9 | 69.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/log.txt) |
| YOLOX-tiny | [yolox_tiny_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 5M | 0.28ms | 31.5 | 49.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/log.txt) |
| YOLOX-nano | [yolox_nano_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 2.2M | 0.19ms | 26.5 | 42.6 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/log.txt) |
| YOLOX-m | [yolox_m_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb16_300e_coco.py) | 25M | 1.52ms | 46.3 | 64.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/log.txt) |
| YOLOX-l | [yolox_l_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb8_300e_coco.py) | 54M | 2.47ms | 48.9 | 67.5 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/log.txt) |
| YOLOX-x | [yolox_x_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_x_8xb8_300e_coco.py) | 99M | 4.74ms | 50.9 | 69.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/log.txt) |
| YOLOX-tiny | [yolox_tiny_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 5M | 0.28ms | 31.5 | 49.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/log.txt) |
| YOLOX-nano | [yolox_nano_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 2.2M | 0.19ms | 26.5 | 42.6 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/log.txt) |
## ViTDet
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | mask_mAP<sup>val<br/><sub>0.5:0.95</sub> | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_100e.py) | 88M/118M | 163ms | 50.57 | 44.96 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/vitdet_maskrcnn.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/vitdet_maskrcnn.log.json) |
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | mask_mAP<sup>val<br/><sub>0.5:0.95</sub> | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_mask_rcnn_100e.py) | 86M/111M | 13.3 (fp16) | 138ms | 50.65 | 45.41 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/20220901_135827.log.json) |
## FCOS
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| FCOS-r50(caffe) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_caffe_1x_coco.py) | 23M/32M | 85.8ms | 38.58 | 57.18 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220621_121315.log.json) |
| FCOS-r50(torch) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_torch_1x_coco.py) | 23M/32M | 105.3ms | 38.88 | 58.01 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/fcos_epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220826_182628.log.json) |
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| FCOS-r50(caffe) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_caffe_1x_coco.py) | 23M/32M | 5.0 | 85.8ms | 38.58 | 57.18 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220621_121315.log.json) |
| FCOS-r50(torch) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_torch_1x_coco.py) | 23M/32M | 4.0 (fp16) | 105.3ms | 38.88 | 58.01 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/fcos_epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220826_182628.log.json) |
## DETR
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| DETR-r50 | [detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/detr/detr_r50_8x2_150e_coco.py) | 23M/41M | 48.5ms | 39.92 | 60.52 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/epoch_150.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/20220609_101243.log.json) |
| DAB-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dab_detr_r50_8x2_50e_coco.py) | 23M/43M | 58.5ms | 42.52 | 63.03 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/dab_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/20220610_122811.log.json) |
| DN-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dn_detr_r50_8x2_50e_coco.py) | 23M/43M | 58.5ms | 44.39 | 64.66 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/dn_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/20220713_105127.log.json) |
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | bbox_mAP<sup>val<br/><sub>0.5:0.95</sub> | AP<sup>val<br/><sub>50</sub> | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| DETR-r50 | [detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/detr/detr_r50_8x2_150e_coco.py) | 23M/41M | 8.5 | 48.5ms | 39.92 | 60.52 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/epoch_150.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/20220609_101243.log.json) |
| DAB-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dab_detr_r50_8x2_50e_coco.py) | 23M/43M | 2.6 | 58.5ms | 42.52 | 63.03 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/dab_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/20220610_122811.log.json) |
| DN-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dn_detr_r50_8x2_50e_coco.py) | 23M/43M | 7.8 | 58.5ms | 44.39 | 64.66 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/dn_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/20220713_105127.log.json) |
## DINO

View File

@ -4,29 +4,29 @@
Pretrained on **Pascal VOC 2012 + Aug**.
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | mIoU | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| fcn_r50_d8 | [fcn_r50-d8_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py) | 23M/49M | 166ms | 69.01 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/20220525_203606.log.json) |
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | mIoU | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| fcn_r50_d8 | [fcn_r50-d8_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py) | 23M/49M | 19.8 | 166ms | 69.01 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/20220525_203606.log.json) |
## UperNet
Pretrained on **Pascal VOC 2012 + Aug**.
| Algorithm | Config | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | mIoU | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| upernet_r50 | [upernet_r50_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12aug.py) | 23M/66M | 282.9ms | 76.59 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/20220706_114712.log.json) |
| Algorithm | Config | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | mIoU | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| upernet_r50 | [upernet_r50_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12aug.py) | 23M/66M | 5.5 | 282.9ms | 76.59 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/20220706_114712.log.json) |
## Mask2former
### Instance Segmentation on COCO
| Algorithm | Config | box MAP | Mask mAP | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |
| mask2former_r50 | [mask2former_r50_8xb2_e50_instance](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_instance.py) | 46.09 | 43.26 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/20220620_113639.log.json) |
| Algorithm | Config | Train memory<br/>(GB) | box MAP | Mask mAP | Download |
| ---------- | ------------------------------------------------------------ |----------|----------|----------|----------|
| mask2former_r50 | [mask2former_r50_8xb2_e50_instance](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_instance.py) | 18.8 | 46.09 | 43.26 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/20220620_113639.log.json) |
### Panoptic Segmentation on COCO
| Algorithm | Config | PQ | box MAP | Mask mAP | Download |
| ---------- | ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |
| mask2former_r50 | [mask2former_r50_8xb2_e50_panopatic](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_panopatic.py) | 51.64 | 44.81 | 41.88 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/20220629_170721.log.json) |
| Algorithm | Config | Train memory<br/>(GB) | PQ | box MAP | Mask mAP | Download |
| ---------- | ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |---------------------------------------------------------------------------- |
| mask2former_r50 | [mask2former_r50_8xb2_e50_panopatic](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_panopatic.py) | 18.8 | 51.64 | 44.81 | 41.88 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/20220629_170721.log.json) |
## SegFormer

View File

@ -5,19 +5,19 @@
Pretrained on **ImageNet** dataset.
| Config | Backbone | Params<br>(backbone/total) | Flops | inference time(V100)<br>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | -------- | -------------------------- | ----- | -------------------------------- | ------ | ------------------------------------------------------------ |
| [mae_vit_base_patch16_8xb64_400e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_400e.py) | ViT-B/16 | 85M/111M | 9.8G | 8.03 | 400 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-400/pretrain_400.pth) |
| [mae_vit_base_patch16_8xb64_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_1600e.py) | ViT-B/16 | 85M/111M | 9.8G | 8.03 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/pretrain_1600.pth) |
| [mae_vit_large_patch16_8xb32_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_large_patch16_8xb32_1600e.py) | ViT-L/16 | 303M/329M | 20.8G | 16.30 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-l-1600/pretrain_1600.pth) |
| Config | Backbone | Params<br>(backbone/total) | Train memory<br/>(GB) | Flops | inference time(V100)<br>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | -------- | -------------------------- | ------------------ | ----- | -------------------------------- | ------ | ------------------------------------------------------------ |
| [mae_vit_base_patch16_8xb64_400e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_400e.py) | ViT-B/16 | 85M/111M | 9.5 | 9.8G | 8.03 | 400 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-400/pretrain_400.pth) |
| [mae_vit_base_patch16_8xb64_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_1600e.py) | ViT-B/16 | 85M/111M | 9.5 | 9.8G | 8.03 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/pretrain_1600.pth) |
| [mae_vit_large_patch16_8xb32_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_large_patch16_8xb32_1600e.py) | ViT-L/16 | 303M/329M | 11.3 | 20.8G | 16.30 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-l-1600/pretrain_1600.pth) |
### Fast ConvMAE
Pretrained on **ImageNet** dataset.
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | --------------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
| [fast_convmae_vit_base_patch16_8xb64_50e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/fast_convmae/fast_convmae_vit_base_patch16_8xb64_50e.py) | ConvViT-B/16 | 88M/115M | 45.1G | 6.88 | 50 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/20220617_110501.log.json) |
| Config | Backbone | Params<br/>(backbone/total) | Train memory<br/>(GB) | Flops | inference time(V100)<br/>(ms/img) | Total train time | Epochs | Download |
| ------------------------------------------------------------ | --------------- | --------------------------- | ----- | --------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| [fast_convmae_vit_base_patch16_8xb64_50e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/fast_convmae/fast_convmae_vit_base_patch16_8xb64_50e.py) | ConvViT-B/16 | 88M/115M | 30.3 | 45.1G | 6.88 | 20h<br/>(8*A100) | 50 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/20220617_110501.log.json) |
> The flops of Fast ConvMAE is about four times of MAE, because the mask of MAE only retains 25% of the tokens each forward, but the mask of Fast ConvMAE adopts a complementary strategy, dividing the mask into four complementary parts with 25% token each part. This is equivalent to learning four samples at each forward, achieving 4 times the learning effect.
@ -25,34 +25,34 @@ Pretrained on **ImageNet** dataset.
Pretrained on **ImageNet** dataset.
| Config | Backbone | Params<br/>(backbone/total) | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | --------- | --------------------------- | --------------------------------- | ------ | ------------------------------------------------------------ |
| [dino_deit_small_p16_8xb32_100e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/dino/dino_deit_small_p16_8xb32_100e_tfrecord.py) | DeiT-S/16 | 21M/88M | 6.17 | 100 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/dino_deit_small/epoch_100.pth) |
| Config | Backbone | Params<br/>(backbone/total) | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | --------- | --------------------------- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
| [dino_deit_small_p16_8xb32_100e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/dino/dino_deit_small_p16_8xb32_100e_tfrecord.py) | DeiT-S/16 | 21M/88M | 10.5 | 6.17 | 100 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/dino_deit_small/epoch_100.pth) |
### MoBY
Pretrained on **ImageNet** dataset.
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | --------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
| [moby_deit_small_p16_4xb128_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_deit_small_p16_4xb128_300e_tfrecord.py) | DeiT-S/16 | 21M/26M | 18.6G | 6.17 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/log.txt) |
| [moby_swin_tiny_8xb64_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_dynamic_swin_tiny_8xb64_300e_tfrecord.py) | Swin-T | 27M/33M | 18.1G | 9.74 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/log.txt) |
| Config | Backbone | Params<br/>(backbone/total) | Flops | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | --------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
| [moby_deit_small_p16_4xb128_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_deit_small_p16_4xb128_300e_tfrecord.py) | DeiT-S/16 | 21M/26M | 18.6G | 21.4 | 6.17 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/log.txt) |
| [moby_swin_tiny_8xb64_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_dynamic_swin_tiny_8xb64_300e_tfrecord.py) | Swin-T | 27M/33M | 18.1G | 16.1 | 9.74 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/log.txt) |
### MoCo V2
Pretrained on **ImageNet** dataset.
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
| [mocov2_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mocov2/mocov2_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 8.2G | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mocov2_r50/epoch_200.pth) |
| Config | Backbone | Params<br/>(backbone/total) | Flops | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
| [mocov2_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mocov2/mocov2_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 8.2G | 5.4 | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mocov2_r50/epoch_200.pth) |
### SwAV
Pretrained on **ImageNet** dataset.
| Config | Backbone | Params<br/>(backbone/total) | Flops | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
| [swav_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/swav/swav_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 12.9G | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/epoch_200.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/log.txt) |
| Config | Backbone | Params<br/>(backbone/total) | Flops | Train memory<br/>(GB) | inference time(V100)<br/>(ms/img) | Epochs | Download |
| ------------------------------------------------------------ | -------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
| [swav_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/swav/swav_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 12.9G | 11.3 | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/epoch_200.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/log.txt) |
## Benchmarks

View File

@ -262,7 +262,7 @@
"state_dict = torch.load(weight_path)['state_dict']\n",
"state_dict_out = {}\n",
"for key in state_dict:\n",
" state_dict_out[key.replace('encoder.','')] = state_dict[key]\n",
" state_dict_out['model.' + key.replace('encoder.','')] = state_dict[key]\n",
"torch.save(state_dict_out,weight_path)"
]
},
@ -324,7 +324,7 @@
"outputs": [],
"source": [
"!python -m torch.distributed.launch --nproc_per_node=1 --master_port=29930 \\\n",
"/home/pai/lib/python3.6/site-packages/easycv/tools/train.py mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py --work_dir work_dir/selfsup/jpg/mae --launcher pytorch"
"/home/pai/lib/python3.6/site-packages/easycv/tools/train.py mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py --work_dir work_dir/selfsup/jpg/mae_fintune --launcher pytorch"
]
},
{
@ -333,7 +333,56 @@
"metadata": {},
"source": [
"### 预测\n",
"参考EasyCV图像分类的demo对训练好的模型导出并预测"
"对训练好的模型导出并预测"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4271c852",
"metadata": {},
"outputs": [],
"source": [
"! python -m easycv.tools.export mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py work_dir/selfsup/jpg/mae_fintune/ClsEvaluator_neck_top1_best.pth work_dir/selfsup/jpg/mae_fintune/best_export.pth"
]
},
{
"cell_type": "markdown",
"id": "2cc9e6fc",
"metadata": {},
"source": [
"下载测试图片和标签文件"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "973d5bd4",
"metadata": {},
"outputs": [],
"source": [
"! wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/cifar10/qince_data/predict/aeroplane_s_000004.png\n",
"! wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/doc/easycv/configs/selfsup/mae/label_map.txt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a5a3632",
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"from easycv.predictors.classifier import TorchClassifier\n",
"\n",
"output_ckpt = 'work_dir/selfsup/jpg/mae_fintune/best_export.pth'\n",
"tcls = TorchClassifier(output_ckpt, topk=1, label_map_path='label_map.txt')\n",
"\n",
"img = cv2.imread('aeroplane_s_000004.png')\n",
"# input image should be RGB order\n",
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
"output = tcls.predict([img])\n",
"print(output)"
]
}
],

View File

@ -13,6 +13,7 @@ import torchvision.transforms.functional as t_f
from mmcv.utils import Config
from easycv.file import io
from easycv.framework.errors import ValueError
from easycv.models import (DINO, MOCO, SWAV, YOLOX, Classification, MoBY,
build_model)
from easycv.utils.checkpoint import load_checkpoint
@ -106,6 +107,9 @@ def _export_cls(model, cfg, filename):
backbone=replace_syncbn(cfg.model.backbone),
)
# avoid load pretrained model
model_config['pretrained'] = False
if export_neck:
if hasattr(cfg.model, 'neck'):
model_config['neck'] = cfg.model.neck

View File

@ -15,6 +15,7 @@ from mmcv.parallel import (MMDataParallel, MMDistributedDataParallel,
from mmcv.runner import get_dist_info
from easycv.file import io
from easycv.framework.errors import ValueError
from easycv.utils.torchacc_util import is_torchacc_enabled

View File

@ -31,6 +31,7 @@ from easycv.core import standard_fields
from easycv.core.evaluation import coco_tools
from easycv.core.post_processing.nms import oks_nms, soft_oks_nms
from easycv.core.standard_fields import DetectionResultFields, InputDataFields
from easycv.framework.errors import KeyError, TypeError, ValueError
from easycv.utils.json_utils import MyEncoder
from .base_evaluator import Evaluator
from .builder import EVALUATORS
@ -365,7 +366,7 @@ class CocoDetectionEvaluator(Evaluator):
def _check_mask_type_and_value(array_name, masks):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1."""
if masks.dtype != np.uint8:
raise ValueError('{} must be of type np.uint8. Found {}.'.format(
raise TypeError('{} must be of type np.uint8. Found {}.'.format(
array_name, masks.dtype))
if np.any(np.logical_and(masks != 0, masks != 1)):
raise ValueError(

View File

@ -3,6 +3,7 @@
# https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
import numpy as np
from easycv.framework.errors import KeyError
from .base_evaluator import Evaluator
from .builder import EVALUATORS
from .metric_registry import METRICS

View File

@ -1,6 +1,8 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import inspect
from easycv.framework.errors import KeyError, TypeError
class MetricRegistry(object):

View File

@ -5,6 +5,7 @@ import numpy as np
import torch
from prettytable import PrettyTable
from easycv.framework.errors import KeyError
from easycv.utils.logger import print_log
from .base_evaluator import Evaluator
from .builder import EVALUATORS

View File

@ -6,6 +6,7 @@ import cv2
import numpy as np
from easycv.core.post_processing import transform_preds
from easycv.framework.errors import ValueError
def _calc_distances(preds, targets, mask, normalize):

View File

@ -4,6 +4,7 @@ import torch
from torch.optim import *
from .builder import build_optimizer_constructor
from .lamb import Lamb
from .lars import LARS
from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
from .ranger import Ranger

View File

@ -8,6 +8,8 @@ from mmcv.runner.optimizer.builder import OPTIMIZERS
from torch import Tensor
from torch.optim import AdamW as _AdamW
from easycv.framework.errors import RuntimeError
def adamw(params: List[Tensor], grads: List[Tensor], exp_avgs: List[Tensor],
exp_avg_sqs: List[Tensor], max_exp_avg_sqs: List[Tensor],

View File

@ -0,0 +1,168 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import math
import torch
from mmcv.runner import OPTIMIZERS
from torch.optim import Optimizer
from easycv.framework.errors import RuntimeError
@OPTIMIZERS.register_module()
class Lamb(Optimizer):
"""A pure pytorch variant of FuseLAMB (NvLamb variant) optimizer.
This class is copied from `timm`_. The LAMB was proposed in `Large Batch
Optimization for Deep Learning - Training BERT in 76 minutes`_.
.. _timm:
https://github.com/rwightman/pytorch-image-models/blob/master/timm/optim/lamb.py
.. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
https://arxiv.org/abs/1904.00962
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups.
lr (float, optional): learning rate. (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its norm. (default: (0.9, 0.999))
eps (float, optional): term added to the denominator to improve
numerical stability. (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
grad_averaging (bool, optional): whether apply (1-beta2) to grad when
calculating running averages of gradient. (default: True)
max_grad_norm (float, optional): value used to clip global grad norm
(default: 1.0)
trust_clip (bool): enable LAMBC trust ratio clipping (default: False)
always_adapt (boolean, optional): Apply adaptive learning rate to 0.0
weight decay parameter (default: False)
""" # noqa: E501
def __init__(self,
params,
lr=1e-3,
bias_correction=True,
betas=(0.9, 0.999),
eps=1e-6,
weight_decay=0.01,
grad_averaging=True,
max_grad_norm=1.0,
trust_clip=False,
always_adapt=False):
defaults = dict(
lr=lr,
bias_correction=bias_correction,
betas=betas,
eps=eps,
weight_decay=weight_decay,
grad_averaging=grad_averaging,
max_grad_norm=max_grad_norm,
trust_clip=trust_clip,
always_adapt=always_adapt)
super().__init__(params, defaults)
@torch.no_grad()
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
with torch.enable_grad():
loss = closure()
device = self.param_groups[0]['params'][0].device
one_tensor = torch.tensor(
1.0, device=device
) # because torch.where doesn't handle scalars correctly
global_grad_norm = torch.zeros(1, device=device)
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad
if grad.is_sparse:
raise RuntimeError(
'Lamb does not support sparse gradients, consider '
'SparseAdam instead.')
global_grad_norm.add_(grad.pow(2).sum())
global_grad_norm = torch.sqrt(global_grad_norm)
# FIXME it'd be nice to remove explicit tensor conversion of scalars
# when torch.where promotes
# scalar types properly https://github.com/pytorch/pytorch/issues/9190
max_grad_norm = torch.tensor(
self.defaults['max_grad_norm'], device=device)
clip_global_grad_norm = torch.where(global_grad_norm > max_grad_norm,
global_grad_norm / max_grad_norm,
one_tensor)
for group in self.param_groups:
bias_correction = 1 if group['bias_correction'] else 0
beta1, beta2 = group['betas']
grad_averaging = 1 if group['grad_averaging'] else 0
beta3 = 1 - beta1 if grad_averaging else 1.0
# assume same step across group now to simplify things
# per parameter step can be easily support by making it tensor, or
# pass list into kernel
if 'step' in group:
group['step'] += 1
else:
group['step'] = 1
if bias_correction:
bias_correction1 = 1 - beta1**group['step']
bias_correction2 = 1 - beta2**group['step']
else:
bias_correction1, bias_correction2 = 1.0, 1.0
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.div_(clip_global_grad_norm)
state = self.state[p]
# State initialization
if len(state) == 0:
# Exponential moving average of gradient valuesa
state['exp_avg'] = torch.zeros_like(p)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(grad, alpha=beta3) # m_t
exp_avg_sq.mul_(beta2).addcmul_(
grad, grad, value=1 - beta2) # v_t
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(
group['eps'])
update = (exp_avg / bias_correction1).div_(denom)
weight_decay = group['weight_decay']
if weight_decay != 0:
update.add_(p, alpha=weight_decay)
if weight_decay != 0 or group['always_adapt']:
# Layer-wise LR adaptation. By default, skip adaptation on
# parameters that are
# excluded from weight decay, unless always_adapt == True,
# then always enabled.
w_norm = p.norm(2.0)
g_norm = update.norm(2.0)
# FIXME nested where required since logical and/or not
# working in PT XLA
trust_ratio = torch.where(
w_norm > 0,
torch.where(g_norm > 0, w_norm / g_norm, one_tensor),
one_tensor,
)
if group['trust_clip']:
# LAMBC trust clipping, upper bound fixed at one
trust_ratio = torch.minimum(trust_ratio, one_tensor)
update.mul_(trust_ratio)
p.add_(update, alpha=-group['lr'])
return loss

View File

@ -3,6 +3,8 @@ import torch
from torch.optim import * # noqa: F401,F403
from torch.optim.optimizer import Optimizer, required
from easycv.framework.errors import ValueError
class LARS(Optimizer):
r"""Implements layer-wise adaptive rate scaling for SGD.

View File

@ -1,5 +1,3 @@
# Reference from https://github.com/ViTAE-Transformer/ViTDet/blob/main/mmcv_custom/layer_decay_optimizer_constructor.py
import json
from mmcv.runner import DefaultOptimizerConstructor, get_dist_info
@ -7,23 +5,32 @@ from mmcv.runner import DefaultOptimizerConstructor, get_dist_info
from .builder import OPTIMIZER_BUILDERS
def get_num_layer_for_vit(var_name, num_max_layer, layer_sep=None):
if var_name in ('backbone.cls_token', 'backbone.mask_token',
'backbone.pos_embed'):
return 0
elif var_name.startswith('backbone.patch_embed'):
return 0
elif var_name.startswith('backbone.blocks'):
layer_id = int(var_name.split('.')[2])
return layer_id + 1
else:
return num_max_layer - 1
def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12):
"""
Calculate lr decay rate for different ViT blocks.
Reference from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py
Args:
name (string): parameter name.
lr_decay_rate (float): base lr decay rate.
num_layers (int): number of ViT blocks.
Returns:
lr decay rate for the given parameter.
"""
layer_id = num_layers + 1
if '.pos_embed' in name or '.patch_embed' in name:
layer_id = 0
elif '.blocks.' in name and '.residual.' not in name:
layer_id = int(name[name.find('.blocks.'):].split('.')[2]) + 1
scale = lr_decay_rate**(num_layers + 1 - layer_id)
return layer_id, scale
@OPTIMIZER_BUILDERS.register_module()
class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
def add_params(self, params, module, prefix='', is_dcn_module=None):
def add_params(self, params, module):
"""Add all parameters of module to the params list.
The parameters of the given module will be added to the list of param
groups, with specific rules defined by paramwise_cfg.
@ -31,54 +38,41 @@ class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
params (list[dict]): A list of param groups, it will be modified
in place.
module (nn.Module): The module to be added.
prefix (str): The prefix of the module
is_dcn_module (int|float|None): If the current module is a
submodule of DCN, `is_dcn_module` will be passed to
control conv_offset layer's learning rate. Defaults to None.
Reference from https://github.com/ViTAE-Transformer/ViTDet/blob/main/mmcv_custom/layer_decay_optimizer_constructor.py
Note: Currently, this optimizer constructor is built for ViTDet.
"""
# get param-wise options
parameter_groups = {}
print(self.paramwise_cfg)
num_layers = self.paramwise_cfg.get('num_layers') + 2
layer_sep = self.paramwise_cfg.get('layer_sep', None)
layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
lr_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
num_layers = self.paramwise_cfg.get('num_layers')
print('Build LayerDecayOptimizerConstructor %f - %d' %
(layer_decay_rate, num_layers))
(lr_decay_rate, num_layers))
lr = self.base_lr
weight_decay = self.base_wd
custom_keys = self.paramwise_cfg.get('custom_keys', {})
# first sort with alphabet order and then sort with reversed len of str
sorted_keys = sorted(custom_keys.keys())
for name, param in module.named_parameters():
if not param.requires_grad:
continue # frozen weights
if len(param.shape) == 1 or name.endswith('.bias') or (
'pos_embed' in name) or ('cls_token'
in name) or ('rel_pos_' in name):
if 'backbone' in name and ('.norm' in name or '.pos_embed' in name
or '.gn.' in name or '.ln.' in name):
group_name = 'no_decay'
this_weight_decay = 0.
else:
group_name = 'decay'
this_weight_decay = weight_decay
layer_id = get_num_layer_for_vit(name, num_layers, layer_sep)
if name.startswith('backbone'):
layer_id, scale = get_vit_lr_decay_rate(
name, lr_decay_rate=lr_decay_rate, num_layers=num_layers)
else:
layer_id, scale = -1, 1
group_name = 'layer_%d_%s' % (layer_id, group_name)
# if the parameter match one of the custom keys, ignore other rules
this_lr_multi = 1.
for key in sorted_keys:
if key in f'{name}':
lr_mult = custom_keys[key].get('lr_mult', 1.)
this_lr_multi = lr_mult
group_name = '%s_%s' % (group_name, key)
break
if group_name not in parameter_groups:
scale = layer_decay_rate**(num_layers - layer_id - 1)
parameter_groups[group_name] = {
'weight_decay': this_weight_decay,
@ -86,7 +80,7 @@ class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
'param_names': [],
'lr_scale': scale,
'group_name': group_name,
'lr': scale * self.base_lr * this_lr_multi,
'lr': scale * lr,
}
parameter_groups[group_name]['params'].append(param)

View File

@ -4,6 +4,8 @@ import math
import torch
from torch.optim.optimizer import Optimizer
from easycv.framework.errors import ValueError
def centralized_gradient(x, use_gc=True, gc_conv_only=False):
'''credit - https://github.com/Yonghongwei/Gradient-Centralization '''

View File

@ -22,6 +22,7 @@ import torch
from easycv.core.sailfish.util import (BiasUniformInitializer,
KaimingUniformInitializer,
ModelParallel, RenormUniformInitializer)
from easycv.framework.errors import ValueError
class Linear(torch.nn.Module):

View File

@ -25,6 +25,7 @@ from easycv.core.sailfish.function import (all_cat, all_log_softmax,
shard_correct_predictions,
shard_target_and_mask,
shard_topk_correct_predictions)
from easycv.framework.errors import NotImplementedError, ValueError
class DistributedParallel:

View File

@ -10,6 +10,8 @@ import numpy as np
from mmcv.utils.misc import deprecated_api_warning
from PIL import Image, ImageDraw, ImageFont
from easycv.framework.errors import FileNotFoundError
def get_font_path():
root_path = opd(opd(opd(os.path.realpath(__file__))))
@ -22,8 +24,8 @@ def get_font_path():
elif os.path.exists(find_path_source):
return find_path_source
else:
raise ValueError('Not find font file both in %s and %s' %
(find_path_whl, find_path_source))
raise FileNotFoundError('Not find font file both in %s and %s' %
(find_path_whl, find_path_source))
_FONT_PATH = get_font_path()

View File

@ -7,6 +7,7 @@ from PIL import Image, ImageFile
from easycv.datasets.registry import DATASOURCES
from easycv.file import io
from easycv.framework.errors import TypeError
from easycv.utils.dist_utils import dist_zero_exec
from .utils import split_listfile_byrank
@ -54,8 +55,8 @@ class ClsSourceImageList(object):
'list_file should be str or list(str)'
root = [root] if isinstance(root, str) else root
if not isinstance(root, list):
raise ValueError('root must be str or list(str), but get %s' %
type(root))
raise TypeError('root must be str or list(str), but get %s' %
type(root))
if len(root) < len(list_file):
logging.warning(

View File

@ -3,6 +3,7 @@ from PIL import Image
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
from easycv.framework.errors import NotImplementedError
@DATASETS.register_module

View File

@ -8,10 +8,11 @@ from typing import Sequence
import mmcv
import numpy as np
from PIL import Image
from PIL import Image, ImageFilter
from easycv.datasets.registry import PIPELINES
from easycv.datasets.shared.pipelines import Compose
from easycv.framework.errors import TypeError
# Default hyperparameters for all Ops
_HPARAMS_DEFAULT = dict(pad_val=128)
@ -1043,3 +1044,37 @@ class Cutout(object):
repr_str += f'pad_val={self.pad_val}, '
repr_str += f'prob={self.prob})'
return repr_str
@PIPELINES.register_module()
class PILGaussianBlur(object):
def __init__(self, prob=0.1, radius_min=0.1, radius_max=2.):
assert 0 <= prob <= 1.0, 'The prob should be in range [0,1], ' \
f'got {prob} instead.'
assert isinstance(radius_min, (int, float)), 'The radius_min type must '\
f'be int or float, but got {type(radius_min)} instead.'
assert isinstance(radius_max, (int, float)), 'The radius_max type must '\
f'be int or float, but got {type(radius_max)} instead.'
self.prob = prob
self.radius_min = radius_min
self.radius_max = radius_max
def __call__(self, results):
if np.random.rand() > self.prob:
return results
for key in results.get('img_fields', ['img']):
img = results[key].filter(
ImageFilter.GaussianBlur(
radius=random.uniform(self.radius_min, self.radius_max)))
results[key] = img
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(prob={self.prob}, '
repr_str += f'radius_min={self.radius_min}, '
repr_str += f'radius_max={self.radius_max})'
return repr_str

View File

@ -10,6 +10,7 @@ from mmcv.runner.dist_utils import get_dist_info
from tqdm import tqdm
from easycv.file.image import load_image
from easycv.framework.errors import NotImplementedError, ValueError
def _load_image(img_path):

View File

@ -4,6 +4,7 @@ from xtcocotools.coco import COCO
from easycv.datasets.registry import DATASOURCES, PIPELINES
from easycv.datasets.shared.pipelines import Compose
from easycv.framework.errors import TypeError
from easycv.utils.registry import build_from_cfg

View File

@ -8,6 +8,7 @@ from xtcocotools.coco import COCO
from easycv.datasets.detection.data_sources import DetSourceCoco
from easycv.datasets.registry import DATASOURCES, PIPELINES
from easycv.datasets.shared.pipelines import Compose
from easycv.framework.errors import RuntimeError, TypeError
from easycv.utils.registry import build_from_cfg
try:

View File

@ -8,6 +8,7 @@ import numpy as np
from easycv.datasets.detection.data_sources.base import DetSourceBase
from easycv.datasets.registry import DATASOURCES
from easycv.file import io
from easycv.framework.errors import NotImplementedError, ValueError
def get_prior_task_id(keys):

View File

@ -9,6 +9,7 @@ import numpy as np
import torch
from easycv.datasets.registry import DATASETS, PIPELINES
from easycv.framework.errors import TypeError
from easycv.utils.bbox_util import xyxy2xywh as xyxy2cxcywh
from easycv.utils.registry import build_from_cfg
from .raw import DetDataset

View File

@ -13,6 +13,7 @@ from torchvision.transforms import functional as F
from easycv.datasets.registry import PIPELINES
from easycv.datasets.shared.pipelines.transforms import Compose
from easycv.framework.errors import KeyError, NotImplementedError, TypeError
try:
from panopticapi.utils import rgb2id
@ -1122,8 +1123,8 @@ class MMRandomFlip:
elif flip_ratio is None:
pass
else:
raise ValueError('flip_ratios must be None, float, '
'or list of float')
raise TypeError('flip_ratios must be None, float, '
'or list of float')
self.flip_ratio = flip_ratio
valid_directions = ['horizontal', 'vertical', 'diagonal']
@ -1133,7 +1134,7 @@ class MMRandomFlip:
assert mmcv.is_list_of(direction, str)
assert set(direction).issubset(set(valid_directions))
else:
raise ValueError('direction must be either str or list of str')
raise TypeError('direction must be either str or list of str')
self.direction = direction
if isinstance(flip_ratio, list):
@ -1168,7 +1169,7 @@ class MMRandomFlip:
flipped[..., 2::4] = w - bboxes[..., 0::4]
flipped[..., 3::4] = h - bboxes[..., 1::4]
else:
raise ValueError(f"Invalid flipping direction '{direction}'")
raise KeyError(f"Invalid flipping direction '{direction}'")
return flipped
def __call__(self, results):
@ -1274,7 +1275,7 @@ class MMRandomCrop:
if crop_type not in [
'relative_range', 'relative', 'absolute', 'absolute_range'
]:
raise ValueError(f'Invalid crop_type {crop_type}.')
raise KeyError(f'Invalid crop_type {crop_type}.')
if crop_type in ['absolute', 'absolute_range']:
assert crop_size[0] > 0 and crop_size[1] > 0
assert isinstance(crop_size[0], int) and isinstance(

View File

@ -9,6 +9,7 @@ from easycv.datasets.detection.data_sources import DetSourceCoco
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
from easycv.file.image import load_image
from easycv.framework.errors import TimeoutError
@DATASETS.register_module
@ -38,7 +39,7 @@ class DetDataset(BaseDataset):
count = 0
while True:
if count > 10:
raise RuntimeError('Loops timeout')
raise TimeoutError('Loops timeout')
data_dict = self.data_source[idx]
data_dict = self.pipeline(data_dict)
if data_dict is None:

View File

@ -11,10 +11,11 @@ from mmcv.runner import get_dist_info
from torch.utils.data import DataLoader, RandomSampler
from easycv.datasets.shared.odps_reader import set_dataloader_workid
from easycv.framework.errors import NotImplementedError
from easycv.utils.dist_utils import sync_random_seed
from easycv.utils.torchacc_util import is_torchacc_enabled
from .collate import CollateWrapper
from .sampler import DistributedMPSampler, DistributedSampler
from .sampler import DistributedMPSampler, DistributedSampler, RASampler
if platform.system() != 'Windows':
# https://github.com/pytorch/pytorch/issues/973
@ -35,6 +36,7 @@ def build_dataloader(dataset,
odps_config=None,
persistent_workers=False,
collate_hooks=None,
use_repeated_augment_sampler=False,
**kwargs):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
@ -56,6 +58,8 @@ def build_dataloader(dataset,
data in worker process can be reused.
persistent_workers (bool) : After pytorch1.7, could use persistent_workers=True to
avoid reconstruct dataworker before each epoch, speed up before epoch
use_repeated_augment_sampler (bool) : If set true, it will use RASampler.
Default: False.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
@ -68,7 +72,9 @@ def build_dataloader(dataset,
'split_huge_listfile_byrank',
False)
if hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
if use_repeated_augment_sampler:
sampler = RASampler(dataset, world_size, rank, shuffle=shuffle)
elif hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
sampler = DistributedMPSampler(
dataset,
world_size,
@ -88,7 +94,10 @@ def build_dataloader(dataset,
else:
if replace:
raise NotImplementedError
if hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
if use_repeated_augment_sampler:
sampler = RASampler(dataset, 1, 0, shuffle=shuffle)
elif hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
sampler = DistributedMPSampler(
dataset, 1, 0, shuffle=shuffle, replace=replace)
else:

View File

@ -6,10 +6,13 @@ import random
import numpy as np
import torch
import torch.distributed as dist
from mmcv.runner import get_dist_info
from torch.utils.data import DistributedSampler as _DistributedSampler
from torch.utils.data import Sampler
from easycv.framework.errors import ValueError
class DistributedMPSampler(_DistributedSampler):
@ -83,7 +86,9 @@ class DistributedMPSampler(_DistributedSampler):
self.label_list = []
if not self.dataset.data_source.has_labels:
raise 'MPSampler need initial with classification datasets which has label!'
raise ValueError(
'MPSampler need initial with classification datasets which has label!'
)
for idx, label in enumerate(self.dataset.data_source.labels):
if label in self.label_dict.keys():
@ -469,3 +474,73 @@ class DistributedGivenIterationSampler(Sampler):
def set_epoch(self, epoch):
pass
class RASampler(torch.utils.data.Sampler):
"""Sampler that restricts data loading to a subset of the dataset for distributed,
with repeated augmentation.
It ensures that different each augmented version of a sample will be visible to a
different process (GPU)
Heavily based on torch.utils.data.DistributedSampler
"""
def __init__(self,
dataset,
num_replicas=None,
rank=None,
shuffle=True,
num_repeats: int = 3):
if num_replicas is None:
if not dist.is_available():
raise RuntimeError(
'Requires distributed package to be available')
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError(
'Requires distributed package to be available')
rank = dist.get_rank()
if num_repeats < 1:
raise ValueError('num_repeats should be greater than 0')
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.num_repeats = num_repeats
self.epoch = 0
self.num_samples = int(
math.ceil(
len(self.dataset) * self.num_repeats / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
# self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas))
self.num_selected_samples = int(
math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
self.shuffle = shuffle
def __iter__(self):
if self.shuffle:
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch)
indices = torch.randperm(len(self.dataset), generator=g)
else:
indices = torch.arange(start=0, end=len(self.dataset))
# add extra samples to make it evenly divisible
indices = torch.repeat_interleave(
indices, repeats=self.num_repeats, dim=0).tolist()
padding_size: int = self.total_size - len(indices)
if padding_size > 0:
indices += indices[:padding_size]
assert len(indices) == self.total_size
# subsample
indices = indices[self.rank:self.total_size:self.num_replicas]
assert len(indices) == self.num_samples
return iter(indices[:self.num_selected_samples])
def __len__(self):
return self.num_selected_samples
def set_epoch(self, epoch):
self.epoch = epoch

View File

@ -7,6 +7,7 @@ import json_tricks as json
import numpy as np
from easycv.datasets.registry import DATASOURCES
from easycv.framework.errors import ValueError
from .top_down import PoseTopDownSource
COCO_DATASET_INFO = dict(

View File

@ -12,6 +12,7 @@ from mmcv.utils.path import is_filepath
from xtcocotools.coco import COCO
from easycv.datasets.registry import DATASOURCES
from easycv.framework.errors import ValueError
class DatasetInfo:

View File

@ -6,6 +6,7 @@ from easycv.core.evaluation.keypoint_eval import KeyPointEvaluator
from easycv.datasets.pose.data_sources.coco import PoseTopDownSource
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
from easycv.framework.errors import ValueError
@DATASETS.register_module()

View File

@ -9,6 +9,7 @@ from easycv.core.post_processing import (affine_transform, fliplr_joints,
get_affine_transform, get_warp_matrix,
warp_affine_joints)
from easycv.datasets.registry import PIPELINES
from easycv.framework.errors import ValueError
@PIPELINES.register_module()

View File

@ -3,6 +3,7 @@ from easycv.core.evaluation.coco_evaluation import CoCoPoseTopDownEvaluator
from easycv.datasets.pose.data_sources.coco import PoseTopDownSource
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
from easycv.framework.errors import ValueError
@DATASETS.register_module()

View File

@ -12,6 +12,7 @@ from tqdm import tqdm
from easycv.datasets.registry import DATASOURCES
from easycv.file.image import load_image as _load_img
from easycv.framework.errors import NotImplementedError, ValueError
def load_image(img_path):
@ -26,7 +27,7 @@ def load_image(img_path):
def load_seg_map(seg_path, reduce_zero_label):
gt_semantic_seg = _load_img(seg_path, mode='RGB')
gt_semantic_seg = _load_img(seg_path, mode='P')
# reduce zero_label
if reduce_zero_label:
# avoid using underflow conversion

View File

@ -7,6 +7,7 @@ from PIL import Image, ImageFile
from easycv.datasets.registry import DATASOURCES
from easycv.file import io
from easycv.framework.errors import ValueError
@DATASOURCES.register_module

View File

@ -7,6 +7,7 @@ from easycv.datasets.builder import build_datasource
from easycv.datasets.registry import DATASETS, PIPELINES
from easycv.datasets.shared.base import BaseDataset
from easycv.datasets.shared.pipelines.transforms import Compose
from easycv.framework.errors import NotImplementedError
from easycv.utils.registry import build_from_cfg

View File

@ -7,6 +7,7 @@ import torch
from mmcv.parallel import DataContainer as DC
from easycv.datasets.registry import PIPELINES
from easycv.framework.errors import TypeError
def to_tensor(data):

View File

@ -6,6 +6,7 @@ import numpy as np
from easycv.datasets.registry import PIPELINES
from easycv.file.image import load_image
from easycv.framework.errors import TypeError
from easycv.utils.registry import build_from_cfg

View File

@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from easycv.datasets.registry import DATASETS
from easycv.framework.errors import NotImplementedError
from .base import BaseDataset

View File

@ -9,6 +9,8 @@ from datetime import datetime
from functools import lru_cache
from typing import List, Union
from easycv.framework.errors import NotImplementedError
class IOBase:

View File

@ -11,6 +11,8 @@ from typing import List, Union
from tqdm import tqdm
from tqdm.utils import CallbackIOWrapper
from easycv.framework.errors import (FileNotFoundError, IOError, RuntimeError,
ValueError)
from .base import IOLocal
from .utils import (OSS_PREFIX, create_namedtuple, get_oss_config, is_oss_path,
mute_stderr, oss_progress)
@ -198,7 +200,7 @@ class IO(IOLocal):
time.sleep(3)
if data is None:
raise ValueError('Read file error: %s!' % full_path)
raise IOError('Read file error: %s!' % full_path)
if mode == 'rb':
return NullContextWrapper(BytesIO(data))
@ -519,6 +521,11 @@ class IO(IOLocal):
]
if path in files:
files.remove(path)
if recursive:
files = [
i for i in files
if not self.isdir(f'{OSS_PREFIX}{bucket.bucket_name}/{i}')
]
if not files and not self._obj_exists(bucket, path):
raise FileNotFoundError(

View File

@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import io
import logging
import time
@ -6,9 +7,10 @@ import cv2
import numpy as np
from PIL import Image
from easycv.file import io
from easycv import file
from easycv.framework.errors import IOError
from easycv.utils.constant import MAX_READ_IMAGE_TRY_TIMES
from .utils import is_oss_path
from .utils import is_oss_path, is_url_path
def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
@ -19,16 +21,31 @@ def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
img = None
while try_cnt < max_try_times:
try:
with io.open(img_path, 'rb') as infile:
# cv2.imdecode may corrupt when the img is broken
image = Image.open(infile) # RGB
if is_url_path(img_path):
from mmcv.fileio.file_client import HTTPBackend
client = HTTPBackend()
img_bytes = client.get(img_path)
buff = io.BytesIO(img_bytes)
image = Image.open(buff)
if mode.upper() != 'BGR' and image.mode.upper() != mode.upper(
):
image = image.convert(mode.upper())
img = np.asarray(image, dtype=np.uint8)
if mode.upper() == 'BGR':
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
assert mode.upper() in ['RGB', 'BGR'
], 'Only support `RGB` and `BGR` mode!'
assert img is not None
break
else:
with file.io.open(img_path, 'rb') as infile:
# cv2.imdecode may corrupt when the img is broken
image = Image.open(infile)
if mode.upper() != 'BGR' and image.mode.upper(
) != mode.upper():
image = image.convert(mode.upper())
img = np.asarray(image, dtype=np.uint8)
if mode.upper() == 'BGR':
if image.mode.upper() != 'RGB':
image = image.convert('RGB')
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
assert img is not None
break
except Exception as e:
logging.error(e)
logging.warning('Read file {} fault, try count : {}'.format(
@ -43,6 +60,6 @@ def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
try_cnt += 1
if img is None:
raise ValueError('Read Image Error: ' + img_path)
raise IOError('Read Image Error: ' + img_path)
return img

View File

@ -10,8 +10,10 @@ from io import StringIO
from tqdm import tqdm
from easycv.framework.errors import ValueError
OSS_PREFIX = 'oss://'
URL_PREFIX = 'https://'
URL_PREFIX = ('https://', 'http://')
def create_namedtuple(**kwargs):
@ -31,6 +33,7 @@ def url_path_exists(url):
urllib.request.urlopen(url).code
except Exception as err:
print(err)
return False
return True

View File

128
easycv/framework/errors.py Normal file
View File

@ -0,0 +1,128 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# total 64 bit
# 63~64 (question category): 01 (user), ...
# 60~62 (error severity): 001 (ERROR), 010(WARNING), 011(INFO), 100 (DEBUG), ...
# 54~59 (product): 00000011 (PAI)
# 49~53 (sub product): 00000 (none)
# 41~48 (framework): 00000001 (tensorflow), 00000010 (pytorch)
# 1~40 (error code)
OK = 0x5818008000000000
RUNTIME = 0x4818008000000001
UNIMPLEMENTED = 0x4818008000000002
INVALID_ARGUMENT = 0x4818008000000003
INVALID_VALUE = 0x4818008000000004
INVALID_KEY = 0x4818008000000005
INVALID_TYPE = 0x4818008000000006
MODULE_NOT_FOUND = 0x4818008000000007
FILE_NOT_FOUND = 0x4818008000000008
IO_FAILED = 0x4818008000000009
PERMISSION_DENIED = 0x481800800000000a
TIMEOUT = 0x481800800000000b
class BaseError(Exception):
"""The base error class for exceptions.
"""
code = None
def __init__(self, message='', details=None, op=None):
"""Creates a new `OpError` indicating that a particular op failed.
Args:
message: The message string describing the failure.
details: The help message that handle the error.
op: The `ops.Operation` that failed, if known; otherwise None. During
eager execution, this field is always `None`.
"""
super(BaseError, self).__init__()
self._op = op
self._message = message
self._details = details
@property
def message(self):
"""The error message that describes the error."""
return self._message
@property
def details(self):
"""The help message that handle the error."""
return self._details
@property
def op(self):
"""The operation that failed, if known.
Returns:
The `Operation` that failed, or None.
"""
return self._op
@property
def error_code(self):
"""The integer error code that describes the error."""
return hex(self.code)
def __str__(self):
print_str = 'ErrorCode: ' + self.error_code
if self.op is not None:
print_str += '\n' + 'Operation: ' + str(self.op)
print_str += '\n' + 'Message: ' + self.message
if self.details is not None:
print_str += '\n' + 'Details: ' + self.details
return print_str
class NotImplementedError(BaseError):
"""Raised when an operation has not been implemented."""
code = UNIMPLEMENTED
class RuntimeError(BaseError):
"""Raised when the system experiences an internal error."""
code = RUNTIME
class PermissionDeniedError(BaseError):
"""Raised when the caller does not have permission to run an operation."""
code = PERMISSION_DENIED
class FileNotFoundError(BaseError):
"""Raised when a requested entity was not found."""
code = FILE_NOT_FOUND
class ModuleNotFoundError(BaseError):
"""Raised when a module could not be located."""
code = MODULE_NOT_FOUND
class InvalidArgumentError(BaseError):
"""Raised when an operation receives an invalid argument."""
code = INVALID_ARGUMENT
class TimeoutError(BaseError):
"""Raised when an operation timed out."""
code = TIMEOUT
class IOError(BaseError):
"""Raised when an operation returns a system-related error, including I/O failures."""
code = IO_FAILED
class ValueError(BaseError):
"""Raised when an operation receives an invalid value."""
code = INVALID_VALUE
class KeyError(BaseError):
"""Raised when a mapping (dictionary) key is not found in the set of existing keys."""
code = INVALID_KEY
class TypeError(BaseError):
"""Raised when an operation or function is applied to an object of inappropriate type."""
code = INVALID_TYPE

View File

@ -13,7 +13,8 @@ from .eval_hook import DistEvalHook, EvalHook
from .export_hook import ExportHook
from .extractor import Extractor
from .logger import PreLoggerHook
from .lr_update_hook import StepFixCosineAnnealingLrUpdaterHook
from .lr_update_hook import (CosineAnnealingWarmupByEpochLrUpdaterHook,
StepFixCosineAnnealingLrUpdaterHook)
from .optimizer_hook import OptimizerHook
from .oss_sync_hook import OSSSyncHook
from .registry import HOOKS
@ -33,7 +34,8 @@ __all__ = [
'OSSSyncHook', 'HOOKS', 'TIMEHook', 'SWAVHook', 'SyncNormHook',
'SyncRandomSizeHook', 'TensorboardLoggerHookV2', 'WandbLoggerHookV2',
'YOLOXLrUpdaterHook', 'YOLOXModeSwitchHook', 'MixupCollateHook',
'PreLoggerHook', 'StepFixCosineAnnealingLrUpdaterHook', 'ThroughputHook'
'PreLoggerHook', 'StepFixCosineAnnealingLrUpdaterHook',
'CosineAnnealingWarmupByEpochLrUpdaterHook', 'ThroughputHook'
]
if LooseVersion(torch.__version__) >= LooseVersion('1.6.0'):

View File

@ -7,6 +7,7 @@ from mmcv.runner import Hook
from torch.utils.data import DataLoader
from easycv.datasets.loader.loader_wrapper import TorchaccLoaderWrapper
from easycv.framework.errors import TypeError
from easycv.hooks.tensorboard import TensorboardLoggerHookV2
from easycv.hooks.wandb import WandbLoggerHookV2

View File

@ -2,6 +2,7 @@
import torch.nn as nn
from torch.utils.data import Dataset
from easycv.framework.errors import TypeError
from easycv.utils.collect import dist_forward_collect, nondist_forward_collect

View File

@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv import runner
from mmcv.runner import HOOKS
from mmcv.runner.hooks.lr_updater import (CosineAnnealingLrUpdaterHook,
annealing_cos)
@ -54,3 +55,29 @@ class StepFixCosineAnnealingLrUpdaterHook(CosineAnnealingLrUpdaterHook):
target_lr = self.min_lr
return annealing_cos(base_lr, target_lr, progress / max_progress)
@HOOKS.register_module()
class CosineAnnealingWarmupByEpochLrUpdaterHook(CosineAnnealingLrUpdaterHook):
def before_train_iter(self, runner: 'runner.BaseRunner'):
cur_iter = runner.iter
epoch_len = len(runner.data_loader)
assert isinstance(self.warmup_iters, int)
if not self.by_epoch:
self.regular_lr = self.get_regular_lr(runner)
if self.warmup is None or cur_iter >= self.warmup_iters:
self._set_lr(runner, self.regular_lr)
else:
if cur_iter % epoch_len == 0:
warmup_lr = self.get_warmup_lr(cur_iter)
self._set_lr(runner, warmup_lr)
elif self.by_epoch:
if self.warmup is None or cur_iter > self.warmup_iters:
return
elif cur_iter == self.warmup_iters:
self._set_lr(runner, self.regular_lr)
else:
if cur_iter % epoch_len == 0:
warmup_lr = self.get_warmup_lr(cur_iter)
self._set_lr(runner, warmup_lr)

View File

@ -6,6 +6,7 @@ import torch
from mmcv.parallel import is_module_wrapper
from mmcv.runner import OptimizerHook as _OptimizerHook
from easycv.framework.errors import TypeError
from easycv.utils.dist_utils import get_dist_info
from easycv.utils.torchacc_util import is_torchacc_enabled
@ -134,7 +135,7 @@ class AMPFP16OptimizerHook(OptimizerHook):
elif isinstance(loss_scale, dict):
self.scaler = amp.GradScaler(**loss_scale)
else:
raise ValueError(
raise TypeError(
'`loss_scale` type must be in [float, dict], but got {loss_scale}'
)

View File

@ -21,4 +21,5 @@ from .resnet_jit import ResNetJIT
from .resnext import ResNeXt
from .shuffle_transformer import ShuffleTransformer
from .swin_transformer import SwinTransformer
from .vision_transformer import VisionTransformer
from .vitdet import ViTDet

View File

@ -10,7 +10,7 @@ from timm.models.layers import trunc_normal_
from easycv.models.registry import BACKBONES
from easycv.models.utils import DropPath
from easycv.models.utils.pos_embed import get_2d_sincos_pos_embed
from .vit_transfomer_dynamic import Block
from .vision_transformer import Block
class PatchEmbed(nn.Module):

View File

@ -7,6 +7,7 @@ from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
normal_init)
from torch.nn.modules.batchnorm import _BatchNorm
from easycv.framework.errors import NotImplementedError, TypeError, ValueError
from easycv.models.registry import BACKBONES
from ..modelzoo import hrnet as model_urls
from .resnet import BasicBlock

View File

@ -11,6 +11,7 @@ from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
normal_init)
from torch.nn.modules.batchnorm import _BatchNorm
from easycv.framework.errors import ValueError
from easycv.models.registry import BACKBONES

View File

@ -13,6 +13,7 @@ from mmcv.cnn.utils.weight_init import (constant_init, normal_init,
trunc_normal_init)
from mmcv.runner import BaseModule, ModuleList, Sequential
from easycv.framework.errors import TypeError
from easycv.models.registry import BACKBONES
from easycv.models.segmentation.utils import (PatchEmbed, nchw_to_nlc,
nlc_to_nchw)

View File

@ -5,6 +5,7 @@ r""" This model is taken from the official PyTorch model zoo.
from torch import nn
from easycv.framework.errors import ValueError
from ..modelzoo import mobilenetv2 as model_urls
from ..registry import BACKBONES

View File

@ -7,6 +7,7 @@ import torch.nn as nn
from timm.models.helpers import load_pretrained
from timm.models.hub import download_cached_file
from easycv.framework.errors import ValueError
from easycv.utils.logger import get_root_logger, print_log
from ..modelzoo import timm_models as model_urls
from ..registry import BACKBONES
@ -16,11 +17,11 @@ from .shuffle_transformer import (shuffletrans_base_p4_w7_224,
from .swin_transformer_dynamic import (dynamic_swin_base_p4_w7_224,
dynamic_swin_small_p4_w7_224,
dynamic_swin_tiny_p4_w7_224)
from .vit_transfomer_dynamic import (dynamic_deit_small_p16,
dynamic_deit_tiny_p16,
dynamic_vit_base_p16,
dynamic_vit_huge_p14,
dynamic_vit_large_p16)
from .vit_transformer_dynamic import (dynamic_deit_small_p16,
dynamic_deit_tiny_p16,
dynamic_vit_base_p16,
dynamic_vit_huge_p14,
dynamic_vit_large_p16)
from .xcit_transformer import (xcit_large_24_p8, xcit_medium_24_p8,
xcit_medium_24_p16, xcit_small_12_p8,
xcit_small_12_p16)
@ -36,7 +37,7 @@ _MODEL_MAP = {
'dynamic_swin_small_p4_w7_224': dynamic_swin_small_p4_w7_224,
'dynamic_swin_base_p4_w7_224': dynamic_swin_base_p4_w7_224,
# vit_transfomer_dynamic
# vit_transformer_dynamic
'dynamic_deit_small_p16': dynamic_deit_small_p16,
'dynamic_deit_tiny_p16': dynamic_deit_tiny_p16,
'dynamic_vit_base_p16': dynamic_vit_base_p16,

View File

@ -14,6 +14,7 @@ import torch.nn.functional as F
from torch.nn import Conv2d, Module, ReLU
from torch.nn.modules.utils import _pair
from easycv.framework.errors import KeyError, NotImplementedError, RuntimeError
from ..registry import BACKBONES

View File

@ -4,6 +4,7 @@ import torch.utils.checkpoint as cp
from mmcv.cnn import constant_init, kaiming_init
from torch.nn.modules.batchnorm import _BatchNorm
from easycv.framework.errors import KeyError
from ..modelzoo import resnet as model_urls
from ..registry import BACKBONES
from ..utils import FReLU, build_conv_layer, build_norm_layer

View File

@ -6,6 +6,7 @@ import torch.nn as nn
from mmcv.cnn import constant_init, kaiming_init
from torch.nn.modules.batchnorm import _BatchNorm
from easycv.framework.errors import KeyError
from ..registry import BACKBONES
from ..utils import build_conv_layer, build_norm_layer

View File

@ -7,6 +7,7 @@ from einops import rearrange
from timm.models.layers import DropPath, trunc_normal_
from torch import nn
from easycv.framework.errors import NotImplementedError
from ..registry import BACKBONES

View File

@ -0,0 +1,283 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
"""
Mostly copy-paste from timm library.
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
"""
from functools import partial
import torch
import torch.nn as nn
from timm.models.layers import trunc_normal_
from easycv.models.utils import DropPath, Mlp
from ..registry import BACKBONES
class Attention(nn.Module):
def __init__(self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.,
proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x, rel_pos_bias=None):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2]
attn = (q @ k.transpose(-2, -1)) * self.scale
if rel_pos_bias is not None:
attn = attn + rel_pos_bias
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x, attn
class Block(nn.Module):
def __init__(self,
dim,
num_heads,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
use_layer_scale=False,
init_values=1e-4):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop)
self.drop_path = DropPath(
drop_path) if drop_path > 0. else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop)
self.use_layer_scale = use_layer_scale
if self.use_layer_scale:
self.gamma_1 = nn.Parameter(
init_values * torch.ones((dim)), requires_grad=True)
self.gamma_2 = nn.Parameter(
init_values * torch.ones((dim)), requires_grad=True)
def forward(self, x, return_attention=False, rel_pos_bias=None):
y, attn = self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)
if return_attention:
return attn
if self.use_layer_scale:
x = x + self.drop_path(self.gamma_1 * y)
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
else:
x = x + self.drop_path(y)
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
def forward_fea_and_attn(self, x):
y, attn = self.attn(self.norm1(x))
if self.use_layer_scale:
x = x + self.drop_path(self.gamma_1 * y)
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
else:
x = x + self.drop_path(y)
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x, attn
class PatchEmbed(nn.Module):
""" Image to Patch Embedding
"""
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
super().__init__()
num_patches = (img_size // patch_size) * (img_size // patch_size)
self.img_size = img_size
self.patch_size = patch_size
self.num_patches = num_patches
self.proj = nn.Conv2d(
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
def forward(self, x):
B, C, H, W = x.shape
x = self.proj(x).flatten(2).transpose(1, 2)
return x
@BACKBONES.register_module
class VisionTransformer(nn.Module):
""" DeiT III is based on ViT. It uses some strategies to make the vit model
better, just like layer scale, stochastic depth, 3-Augment.
Paper link: https://arxiv.org/pdf/2204.07118.pdf (DeiT III: Revenge of the ViT)
Args:
img_size (list): Input image size. img_size=[224] means the image size is
224*224. img_size=[192, 224] means the image size is 192*224.
patch_size (int): The patch size. Default: 16
in_chans (int): The num of input channels. Default: 3
num_classes (int): The num of picture classes. Default: 1000
embed_dim (int): The dimensions of embedding. Default: 768
depth (int): The num of blocks. Default: 12
num_heads (int): Parallel attention heads. Default: 12
mlp_ratio (float): Mlp expansion ratio. Default: 4.0
qkv_bias (bool): Does kqv use bias. Default: False
qk_scale (float | None): In the step of self-attention, if qk_scale is not
None, it will use qk_scale to scale the q @ k. Otherwise it will use
head_dim**-0.5 instead of qk_scale. Default: None
drop_rate (float): Probability of an element to be zeroed after the feed
forward layer. Default: 0.0
drop_path_rate (float): Stochastic depth rate. Default: 0
norm_layer (nn.Module): normalization layer
global_pool (bool): Global pool before head. Default: False
use_layer_scale (bool): If use_layer_scale is True, it will use layer
scale. Default: False
init_scale (float): It is used for layer scale in Block to scale the
gamma_1 and gamma_2.
"""
def __init__(self,
img_size=[224],
patch_size=16,
in_chans=3,
num_classes=1000,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
global_pool=False,
use_layer_scale=False,
init_scale=1e-4,
**kwargs):
super().__init__()
self.num_features = self.embed_dim = embed_dim
self.num_heads = num_heads
self.mlp_ratio = mlp_ratio
self.qkv_bias = qkv_bias
self.qk_scale = qk_scale
self.drop_rate = drop_rate
self.attn_drop_rate = attn_drop_rate
self.norm_layer = norm_layer
self.use_layer_scale = use_layer_scale
self.init_scale = init_scale
self.patch_embed = PatchEmbed(
img_size=img_size[0],
patch_size=patch_size,
in_chans=in_chans,
embed_dim=embed_dim)
num_patches = self.patch_embed.num_patches
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
self.pos_drop = nn.Dropout(p=drop_rate)
self.drop_path_rate = drop_path_rate
self.depth = depth
dpr = [drop_path_rate for i in range(depth)]
self.blocks = nn.ModuleList([
Block(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_path=dpr[i],
norm_layer=norm_layer,
use_layer_scale=use_layer_scale,
init_values=init_scale) for i in range(depth)
])
self.norm = norm_layer(embed_dim)
# Classifier head
self.head = nn.Linear(
embed_dim, num_classes) if num_classes > 0 else nn.Identity()
# Use global average pooling
self.global_pool = global_pool
if self.global_pool:
self.fc_norm = norm_layer(embed_dim)
self.norm = None
def init_weights(self):
trunc_normal_(self.pos_embed, std=.02)
trunc_normal_(self.cls_token, std=.02)
for m in self.modules():
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.LayerNorm):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1.0)
def forward(self, x):
x = self.forward_features(x)
x = self.pos_drop(x)
x = self.head(x)
return [x]
def forward_features(self, x):
B = x.shape[0]
x = self.patch_embed(x)
cls_tokens = self.cls_token.expand(B, -1, -1)
x = x + self.pos_embed
x = torch.cat((cls_tokens, x), dim=1)
for blk in self.blocks:
x = blk(x)
if self.norm is not None:
x = self.norm(x)
if self.global_pool:
x = x[:, 1:, :].mean(dim=1)
return self.fc_norm(x)
else:
return x[:, 0]

View File

@ -12,198 +12,51 @@ from functools import partial
import torch
import torch.nn as nn
from timm.models.layers import trunc_normal_
from easycv.models.utils import DropPath, Mlp
from easycv.models.backbones.vision_transformer import Block, VisionTransformer
class Attention(nn.Module):
class DynamicVisionTransformer(VisionTransformer):
"""Dynamic Vision Transformer
def __init__(self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.,
proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
Args:
use_dense_prediction (bool): If use_dense_prediction is True, the global
pool and norm will before head will be removed.(if any) Default: False
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x, rel_pos_bias=None):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2]
attn = (q @ k.transpose(-2, -1)) * self.scale
if rel_pos_bias is not None:
attn = attn + rel_pos_bias
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x, attn
class Block(nn.Module):
def __init__(self,
dim,
num_heads,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop)
self.drop_path = DropPath(
drop_path) if drop_path > 0. else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop)
def forward(self, x, return_attention=False, rel_pos_bias=None):
y, attn = self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)
if return_attention:
return attn
x = x + self.drop_path(y)
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
def forward_fea_and_attn(self, x):
y, attn = self.attn(self.norm1(x))
x = x + self.drop_path(y)
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x, attn
class PatchEmbed(nn.Module):
""" Image to Patch Embedding
"""
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
super().__init__()
num_patches = (img_size // patch_size) * (img_size // patch_size)
self.img_size = img_size
self.patch_size = patch_size
self.num_patches = num_patches
def __init__(self, use_dense_prediction=False, **kwargs):
super(DynamicVisionTransformer, self).__init__(**kwargs)
self.proj = nn.Conv2d(
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
def forward(self, x):
B, C, H, W = x.shape
x = self.proj(x).flatten(2).transpose(1, 2)
return x
class DynamicVisionTransformer(nn.Module):
"""Dynamic Vision Transformer """
def __init__(self,
img_size=[224],
patch_size=16,
in_chans=3,
num_classes=0,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=nn.LayerNorm,
use_dense_prediction=False,
global_pool=False,
**kwargs):
super().__init__()
self.num_features = self.embed_dim = embed_dim
self.patch_embed = PatchEmbed(
img_size=img_size[0],
patch_size=patch_size,
in_chans=in_chans,
embed_dim=embed_dim)
num_patches = self.patch_embed.num_patches
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
self.pos_embed = nn.Parameter(
torch.zeros(1, num_patches + 1, embed_dim))
self.pos_drop = nn.Dropout(p=drop_rate)
torch.zeros(1, num_patches + 1, self.embed_dim))
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
] # stochastic depth decay rule
dpr = [
x.item()
for x in torch.linspace(0, self.drop_path_rate, self.depth)
]
self.blocks = nn.ModuleList([
Block(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
dim=self.embed_dim,
num_heads=self.num_heads,
mlp_ratio=self.mlp_ratio,
qkv_bias=self.qkv_bias,
qk_scale=self.qk_scale,
drop=self.drop_rate,
attn_drop=self.attn_drop_rate,
drop_path=dpr[i],
norm_layer=norm_layer) for i in range(depth)
norm_layer=self.norm_layer,
use_layer_scale=self.use_layer_scale,
init_values=self.init_scale) for i in range(self.depth)
])
self.norm = norm_layer(embed_dim)
# Classifier head
self.head = nn.Linear(
embed_dim, num_classes) if num_classes > 0 else nn.Identity()
# Dense prediction head
self.use_dense_prediction = use_dense_prediction
if self.use_dense_prediction:
self.head_dense = None
# Use global average pooling
self.global_pool = global_pool
if self.global_pool:
self.fc_norm = norm_layer(embed_dim)
self.norm = None
trunc_normal_(self.pos_embed, std=.02)
trunc_normal_(self.cls_token, std=.02)
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.LayerNorm):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1.0)
def forward(self, x):
# convert to list
if not isinstance(x, list):

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@ import torch.nn as nn
from timm.models.layers import DropPath, to_2tuple, trunc_normal_
from timm.models.vision_transformer import Mlp, _cfg
from easycv.framework.errors import ValueError
from ..registry import BACKBONES
@ -109,7 +110,7 @@ class ConvPatchEmbed(nn.Module):
conv3x3(embed_dim // 2, embed_dim, 2),
)
else:
raise (
raise ValueError(
'For convolutional projection, patch size has to be in [8, 16]'
)

View File

@ -8,6 +8,8 @@ import torch.distributed as dist
import torch.nn as nn
from torch import Tensor
from easycv.framework.errors import NotImplementedError, TypeError
class BaseModel(nn.Module, metaclass=ABCMeta):
''' base class for model. '''

View File

@ -7,6 +7,7 @@ import torch.nn as nn
from mmcv.runner import get_dist_info
from timm.data.mixup import Mixup
from easycv.framework.errors import KeyError, NotImplementedError, ValueError
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger, print_log
from easycv.utils.preprocess_function import (bninceptionPre, gaussianBlur,
@ -53,22 +54,15 @@ class Classification(BaseModel):
if 'mixUp' in train_preprocess:
rank, _ = get_dist_info()
np.random.seed(rank + 12)
if not mixup_cfg:
num_classes = head.get(
'num_classes',
1000) if 'num_classes' in head else backbone.get(
'num_classes', 1000)
mixup_cfg = dict(
mixup_alpha=0.8,
cutmix_alpha=1.0,
cutmix_minmax=None,
prob=1.0,
switch_prob=0.5,
mode='batch',
label_smoothing=0.1,
num_classes=num_classes)
self.mixup = Mixup(**mixup_cfg)
head.loss_config = {'type': 'SoftTargetCrossEntropy'}
if mixup_cfg is not None:
if 'num_classes' in mixup_cfg:
self.mixup = Mixup(**mixup_cfg)
elif 'num_classes' in head or 'num_classes' in backbone:
num_classes = head.get(
'num_classes'
) if 'num_classes' in head else backbone.get('num_classes')
mixup_cfg['num_classes'] = num_classes
self.mixup = Mixup(**mixup_cfg)
train_preprocess.remove('mixUp')
self.train_preprocess = [
self.preprocess_key_map[i] for i in train_preprocess
@ -173,7 +167,10 @@ class Classification(BaseModel):
for preprocess in self.train_preprocess:
img = preprocess(img)
if hasattr(self, 'mixup'):
# When the number of samples in the dataset is odd, the last batch size of each epoch will be odd,
# which will cause mixup to report an error. To avoid this situation, mixup is applied only when
# the batch size is even.
if hasattr(self, 'mixup') and len(img) % 2 == 0:
img, gt_labels = self.mixup(img, gt_labels)
x = self.forward_backbone(img)
@ -304,4 +301,4 @@ class Classification(BaseModel):
rv['gt_labels'] = gt_labels.cpu()
return rv
else:
raise Exception('No such mode: {}'.format(mode))
raise KeyError('No such mode: {}'.format(mode))

View File

@ -29,6 +29,8 @@ from torch.nn.init import constant_
from torch.nn.modules.linear import Linear
from torch.nn.modules.module import Module
from easycv.framework.errors import RuntimeError
try:
from torch.overrides import has_torch_function, handle_torch_function
except:

View File

@ -14,6 +14,7 @@ import torch
import torch.nn.functional as F
from torch import Tensor, nn
from easycv.framework.errors import NotImplementedError, ValueError
from easycv.models.builder import NECKS
from easycv.models.detection.utils import inverse_sigmoid
from easycv.models.utils import (MLP, TransformerEncoder,

View File

@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from easycv.framework.errors import ValueError
from easycv.models.base import BaseModel
from easycv.models.builder import (MODELS, build_backbone, build_head,
build_neck)

Some files were not shown because too many files have changed in this diff Show More