From 7301f8c7b63453264b37ec7df31e302b25f3930b Mon Sep 17 00:00:00 2001
From: Cathy0908 <30484308+Cathy0908@users.noreply.github.com>
Date: Wed, 14 Sep 2022 12:04:52 +0800
Subject: [PATCH 1/9] fix io.copytree (#193)
---
easycv/file/file_io.py | 5 +++++
tests/file/test_file_io.py | 25 ++++++++++++++++++++++++-
tests/ut_config.py | 3 ++-
3 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/easycv/file/file_io.py b/easycv/file/file_io.py
index 50e052eb..04e13743 100644
--- a/easycv/file/file_io.py
+++ b/easycv/file/file_io.py
@@ -519,6 +519,11 @@ class IO(IOLocal):
]
if path in files:
files.remove(path)
+ if recursive:
+ files = [
+ i for i in files
+ if not self.isdir(f'{OSS_PREFIX}{bucket.bucket_name}/{i}')
+ ]
if not files and not self._obj_exists(bucket, path):
raise FileNotFoundError(
diff --git a/tests/file/test_file_io.py b/tests/file/test_file_io.py
index e67f75ed..31929f0c 100644
--- a/tests/file/test_file_io.py
+++ b/tests/file/test_file_io.py
@@ -7,7 +7,8 @@ import unittest
import uuid
from tests.ut_config import (BASE_LOCAL_PATH, CLS_DATA_NPY_LOCAL,
- CLS_DATA_NPY_OSS, IO_DATA_TXTX_OSS, TMP_DIR_OSS)
+ CLS_DATA_NPY_OSS, IO_DATA_MULTI_DIRS_OSS,
+ IO_DATA_TXTX_OSS, TMP_DIR_OSS)
from easycv.file import io
@@ -128,6 +129,28 @@ class IOForOSSTest(unittest.TestCase):
io.remove(temp_dir)
io.remove(oss_file_path2)
+ def test_copytree_multi_dirs(self):
+ target = [
+ 'dir1/a.txt', 'dir1/dir1_1/a.txt', 'dir1/dir1_1/b.txt',
+ 'dir2/b.txt'
+ ]
+ # test copy dir from oss to local
+ oss_file_path1 = IO_DATA_MULTI_DIRS_OSS
+ temp_dir = tempfile.TemporaryDirectory().name
+ io.copytree(oss_file_path1, temp_dir)
+ self.assertTrue(io.exists(temp_dir))
+ self.assertCountEqual(io.listdir(temp_dir, recursive=True), target)
+
+ # test copy dir from local to oss
+ oss_file_path2 = os.path.join(TMP_DIR_OSS, '%s' % uuid.uuid4().hex)
+ io.copytree(temp_dir, oss_file_path2)
+ self.assertTrue(io.exists(oss_file_path2))
+ self.assertCountEqual(
+ io.listdir(oss_file_path2, recursive=True), target)
+
+ io.remove(temp_dir)
+ io.remove(oss_file_path2)
+
def test_listdir(self):
# with suffix /
files = io.listdir(IO_DATA_TXTX_OSS.rstrip('/') + '/')
diff --git a/tests/ut_config.py b/tests/ut_config.py
index 64284b1d..59f32e63 100644
--- a/tests/ut_config.py
+++ b/tests/ut_config.py
@@ -45,7 +45,8 @@ SMALL_IMAGENET_TFRECORD_OSS = os.path.join(
BASE_OSS_PATH, 'data/classification/small_imagenet_tfrecord/')
IO_DATA_TXTX_OSS = os.path.join(BASE_OSS_PATH, 'data/io_test_dir/txts/')
-
+IO_DATA_MULTI_DIRS_OSS = os.path.join(BASE_OSS_PATH,
+ 'data/io_test_dir/multi_dirs/')
DET_DATA_SMALL_COCO_LOCAL = os.path.join(BASE_LOCAL_PATH,
'data/detection/small_coco')
From 29f0e4242703e3c68c1605767099d205559ced89 Mon Sep 17 00:00:00 2001
From: Cathy0908 <30484308+Cathy0908@users.noreply.github.com>
Date: Wed, 14 Sep 2022 12:05:52 +0800
Subject: [PATCH 2/9] fix import error in quantize_utils.py(#180)
---
easycv/toolkit/quantize/quantize_utils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/easycv/toolkit/quantize/quantize_utils.py b/easycv/toolkit/quantize/quantize_utils.py
index 8759c593..c7ef8aa8 100644
--- a/easycv/toolkit/quantize/quantize_utils.py
+++ b/easycv/toolkit/quantize/quantize_utils.py
@@ -7,8 +7,8 @@ import numpy as np
import torch
from mmcv.parallel import scatter_kwargs
+from easycv.models.detection.detectors.yolox.yolo_head import YOLOXHead
from easycv.models.detection.utils import output_postprocess, postprocess
-from easycv.models.detection.yolox.yolo_head import YOLOXHead
def quantize_config_check(device, backend, model_type=''):
From 0cb91de0cb80845f2a09f6de7c3697f28b51629e Mon Sep 17 00:00:00 2001
From: zzoneee <55594658+zzoneee@users.noreply.github.com>
Date: Wed, 14 Sep 2022 15:24:54 +0800
Subject: [PATCH 3/9] add DeiT III (#171)
1.Add a backbone: deitiii.
2.Add an optimizer: lamb.
3.Add a sampler: RASampler.
4.Add a lr update hook: CosineAnnealingWarmupByEpochLrUpdaterHook.
5.In easycv/models/classification/classification.py, I remove the default mixup_cfg to keep the classification.py clean.
---
.../imagenet/vit/deitiii_base_patch16_192.py | 143 +++++++++
.../imagenet_deitiii_base_patch16_192_jpg.py | 17 ++
.../imagenet_deitiii_large_patch16_192_jpg.py | 17 ++
.../imagenet_deitiii_small_patch16_224_jpg.py | 86 ++++++
docs/source/model_zoo_cls.md | 3 +
easycv/core/optimizer/__init__.py | 1 +
easycv/core/optimizer/lamb.py | 166 ++++++++++
.../classification/pipelines/auto_augment.py | 36 ++-
easycv/datasets/loader/build_loader.py | 14 +-
easycv/datasets/loader/sampler.py | 71 +++++
easycv/hooks/__init__.py | 6 +-
easycv/hooks/lr_update_hook.py | 27 ++
easycv/models/backbones/__init__.py | 1 +
easycv/models/backbones/conv_mae_vit.py | 2 +-
easycv/models/backbones/vision_transformer.py | 287 ++++++++++++++++++
.../backbones/vit_transfomer_dynamic.py | 190 +-----------
.../models/classification/classification.py | 30 +-
easycv/models/heads/cls_head.py | 6 +-
easycv/models/loss/cross_entropy_loss.py | 53 +++-
tests/models/backbones/test_deitiii.py | 42 +++
tools/train.py | 5 +-
21 files changed, 982 insertions(+), 221 deletions(-)
create mode 100644 configs/classification/imagenet/vit/deitiii_base_patch16_192.py
create mode 100644 configs/classification/imagenet/vit/imagenet_deitiii_base_patch16_192_jpg.py
create mode 100644 configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py
create mode 100644 configs/classification/imagenet/vit/imagenet_deitiii_small_patch16_224_jpg.py
create mode 100644 easycv/core/optimizer/lamb.py
create mode 100644 easycv/models/backbones/vision_transformer.py
create mode 100644 tests/models/backbones/test_deitiii.py
diff --git a/configs/classification/imagenet/vit/deitiii_base_patch16_192.py b/configs/classification/imagenet/vit/deitiii_base_patch16_192.py
new file mode 100644
index 00000000..46b620f1
--- /dev/null
+++ b/configs/classification/imagenet/vit/deitiii_base_patch16_192.py
@@ -0,0 +1,143 @@
+# from PIL import Image
+
+_base_ = 'configs/base.py'
+
+log_config = dict(
+ interval=10,
+ hooks=[dict(type='TextLoggerHook'),
+ dict(type='TensorboardLoggerHook')])
+
+# model settings
+model = dict(
+ type='Classification',
+ train_preprocess=['mixUp'],
+ pretrained=False,
+ mixup_cfg=dict(
+ mixup_alpha=0.8,
+ cutmix_alpha=1.0,
+ cutmix_minmax=None,
+ prob=1.0,
+ switch_prob=0.5,
+ mode='batch',
+ label_smoothing=0.0,
+ num_classes=1000),
+ backbone=dict(
+ type='VisionTransformer',
+ img_size=[192],
+ num_classes=1000,
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_rate=0.,
+ drop_path_rate=0.2,
+ use_layer_scale=True),
+ head=dict(
+ type='ClsHead',
+ loss_config=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=1.0,
+ label_ceil=True),
+ with_fc=False,
+ use_num_classes=False))
+
+data_train_list = 'data/imagenet1k/train.txt'
+data_train_root = 'data/imagenet1k/train/'
+data_test_list = 'data/imagenet1k/val.txt'
+data_test_root = 'data/imagenet1k/val/'
+
+dataset_type = 'ClsDataset'
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+three_augment_policies = [[
+ dict(type='PILGaussianBlur', prob=1.0, radius_min=0.1, radius_max=2.0),
+], [
+ dict(type='Solarization', threshold=128),
+], [
+ dict(type='Grayscale', num_output_channels=3),
+]]
+train_pipeline = [
+ dict(
+ type='RandomResizedCrop', size=192, scale=(0.08, 1.0),
+ interpolation=3), # interpolation='bicubic'
+ dict(type='RandomHorizontalFlip'),
+ dict(type='MMAutoAugment', policies=three_augment_policies),
+ dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3),
+ dict(type='ToTensor'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Collect', keys=['img', 'gt_labels'])
+]
+size = int((256 / 224) * 192)
+test_pipeline = [
+ dict(type='Resize', size=size, interpolation=3),
+ dict(type='CenterCrop', size=192),
+ dict(type='ToTensor'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Collect', keys=['img', 'gt_labels'])
+]
+
+data = dict(
+ imgs_per_gpu=256,
+ workers_per_gpu=8,
+ use_repeated_augment_sampler=True,
+ train=dict(
+ type=dataset_type,
+ data_source=dict(
+ list_file=data_train_list,
+ root=data_train_root,
+ type='ClsSourceImageList'),
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ data_source=dict(
+ list_file=data_test_list,
+ root=data_test_root,
+ type='ClsSourceImageList'),
+ pipeline=test_pipeline))
+
+eval_config = dict(initial=True, interval=1, gpu_collect=True)
+eval_pipelines = [
+ dict(
+ mode='test',
+ data=data['val'],
+ dist_eval=True,
+ evaluators=[dict(type='ClsEvaluator', topk=(1, 5))],
+ )
+]
+
+# additional hooks
+custom_hooks = []
+
+# optimizer
+optimizer = dict(
+ type='Lamb',
+ lr=0.003,
+ weight_decay=0.05,
+ eps=1e-8,
+ paramwise_options={
+ 'cls_token': dict(weight_decay=0.),
+ 'pos_embed': dict(weight_decay=0.),
+ 'bias': dict(weight_decay=0.),
+ 'norm': dict(weight_decay=0.),
+ 'gamma_1': dict(weight_decay=0.),
+ 'gamma_2': dict(weight_decay=0.),
+ })
+optimizer_config = dict(grad_clip=None, update_interval=1)
+
+lr_config = dict(
+ policy='CosineAnnealingWarmupByEpoch',
+ by_epoch=True,
+ min_lr_ratio=0.00001 / 0.003,
+ warmup='linear',
+ warmup_by_epoch=True,
+ warmup_iters=5,
+ warmup_ratio=0.000001 / 0.003,
+)
+checkpoint_config = dict(interval=10)
+
+# runtime settings
+total_epochs = 800
+
+ema = dict(decay=0.99996)
diff --git a/configs/classification/imagenet/vit/imagenet_deitiii_base_patch16_192_jpg.py b/configs/classification/imagenet/vit/imagenet_deitiii_base_patch16_192_jpg.py
new file mode 100644
index 00000000..5a35f946
--- /dev/null
+++ b/configs/classification/imagenet/vit/imagenet_deitiii_base_patch16_192_jpg.py
@@ -0,0 +1,17 @@
+_base_ = './deitiii_base_patch16_192.py'
+# model settings
+model = dict(
+ type='Classification',
+ backbone=dict(
+ type='VisionTransformer',
+ img_size=[192],
+ num_classes=1000,
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_rate=0.,
+ drop_path_rate=0.2,
+ use_layer_scale=True))
diff --git a/configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py b/configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py
new file mode 100644
index 00000000..4c82cf9a
--- /dev/null
+++ b/configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py
@@ -0,0 +1,17 @@
+_base_ = './deitiii_base_patch16_192.py'
+# model settings
+model = dict(
+ type='Classification',
+ backbone=dict(
+ type='VisionTransformer',
+ img_size=[192],
+ num_classes=1000,
+ patch_size=16,
+ embed_dim=1024,
+ depth=24,
+ num_heads=16,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_rate=0.,
+ drop_path_rate=0.45,
+ use_layer_scale=True))
diff --git a/configs/classification/imagenet/vit/imagenet_deitiii_small_patch16_224_jpg.py b/configs/classification/imagenet/vit/imagenet_deitiii_small_patch16_224_jpg.py
new file mode 100644
index 00000000..9ba9cf77
--- /dev/null
+++ b/configs/classification/imagenet/vit/imagenet_deitiii_small_patch16_224_jpg.py
@@ -0,0 +1,86 @@
+_base_ = './deitiii_base_patch16_192.py'
+# model settings
+model = dict(
+ type='Classification',
+ backbone=dict(
+ type='VisionTransformer',
+ img_size=[224],
+ num_classes=1000,
+ patch_size=16,
+ embed_dim=384,
+ depth=12,
+ num_heads=6,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_rate=0.,
+ drop_path_rate=0.05,
+ use_layer_scale=True))
+
+data_train_list = 'data/imagenet1k/train.txt'
+data_train_root = 'data/imagenet1k/train/'
+data_test_list = 'data/imagenet1k/val.txt'
+data_test_root = 'data/imagenet1k/val/'
+
+dataset_type = 'ClsDataset'
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+three_augment_policies = [[
+ dict(type='PILGaussianBlur', prob=1.0, radius_min=0.1, radius_max=2.0),
+], [
+ dict(type='Solarization', threshold=128),
+], [
+ dict(type='Grayscale', num_output_channels=3),
+]]
+train_pipeline = [
+ dict(
+ type='RandomResizedCrop', size=224, scale=(0.08, 1.0),
+ interpolation=3), # interpolation='bicubic'
+ dict(type='RandomHorizontalFlip'),
+ dict(type='MMAutoAugment', policies=three_augment_policies),
+ dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3),
+ dict(type='ToTensor'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Collect', keys=['img', 'gt_labels'])
+]
+test_pipeline = [
+ dict(type='Resize', size=256, interpolation=3),
+ dict(type='CenterCrop', size=224),
+ dict(type='ToTensor'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Collect', keys=['img', 'gt_labels'])
+]
+
+data = dict(
+ imgs_per_gpu=256,
+ workers_per_gpu=8,
+ use_repeated_augment_sampler=True,
+ train=dict(
+ type=dataset_type,
+ data_source=dict(
+ list_file=data_train_list,
+ root=data_train_root,
+ type='ClsSourceImageList'),
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ data_source=dict(
+ list_file=data_test_list,
+ root=data_test_root,
+ type='ClsSourceImageList'),
+ pipeline=test_pipeline))
+
+eval_pipelines = [
+ dict(
+ mode='test',
+ data=data['val'],
+ dist_eval=True,
+ evaluators=[dict(type='ClsEvaluator', topk=(1, 5))],
+ )
+]
+
+# optimizer
+optimizer = dict(lr=0.004)
+
+lr_config = dict(
+ min_lr_ratio=0.00001 / 0.004,
+ warmup_ratio=0.000001 / 0.004,
+)
diff --git a/docs/source/model_zoo_cls.md b/docs/source/model_zoo_cls.md
index 3d91275e..a2254ddf 100644
--- a/docs/source/model_zoo_cls.md
+++ b/docs/source/model_zoo_cls.md
@@ -21,6 +21,9 @@
| hrnetw64 | [hrnetw64](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/hrnet/imagenet_hrnetw64_jpg.py) | 79.884 | 95.04 | 5120 | 54.74 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/resnet/hrnetw64/epoch_100.pth) |
| vit-base-patch16 | [vit-base-patch16](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_vit_base_patch16_224_jpg.py) | 76.082 | 92.026 | 346 | 8.03 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/vit/vit-base-patch16/epoch_300.pth) |
| swin-tiny-patch4-window7 | [swin-tiny-patch4-window7](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/swint/imagenet_swin_tiny_patch4_window7_224_jpg.py) | 80.528 | 94.822 | 132 | 12.94 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/swint/swin-tiny-patch4-window7/epoch_300.pth) |
+| deitiii-small-patch16-224 | [deitiii-small-patch16-224](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_small_patch16_224_jpg.py) | 81.408 | 95.388 | 89 | 4.53 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_small_patch16_224/deitiii_small.pth) |
+| deitiii-base-patch16-192 | [deitiii-base-patch16-192](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_base_patch16_192_jpg.py) | 82.982 | 95.95 | 337 | 4.63 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_base_patch16_192/deitiii_base.pth) |
+| deitiii-large-patch16-192 | [deitiii-large-patch16-192](https://github.com/alibaba/EasyCV/tree/master/configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py) | 83.902 | 96.296 | 1170 | 10.17 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/imagenet_deitiii_large_patch16_192/deitiii_large.pth) |
(ps: 通过EasyCV训练得到模型结果,推理的输入尺寸默认为224,机器默认为V100 16G,其中gpu memory记录的是gpu peak memory)
diff --git a/easycv/core/optimizer/__init__.py b/easycv/core/optimizer/__init__.py
index b4df330b..4c2bf30d 100644
--- a/easycv/core/optimizer/__init__.py
+++ b/easycv/core/optimizer/__init__.py
@@ -4,6 +4,7 @@ import torch
from torch.optim import *
from .builder import build_optimizer_constructor
+from .lamb import Lamb
from .lars import LARS
from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
from .ranger import Ranger
diff --git a/easycv/core/optimizer/lamb.py b/easycv/core/optimizer/lamb.py
new file mode 100644
index 00000000..6295cdc7
--- /dev/null
+++ b/easycv/core/optimizer/lamb.py
@@ -0,0 +1,166 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import math
+
+import torch
+from mmcv.runner import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module()
+class Lamb(Optimizer):
+ """A pure pytorch variant of FuseLAMB (NvLamb variant) optimizer.
+ This class is copied from `timm`_. The LAMB was proposed in `Large Batch
+ Optimization for Deep Learning - Training BERT in 76 minutes`_.
+ .. _timm:
+ https://github.com/rwightman/pytorch-image-models/blob/master/timm/optim/lamb.py
+ .. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
+ https://arxiv.org/abs/1904.00962
+ Arguments:
+ params (iterable): iterable of parameters to optimize or dicts defining
+ parameter groups.
+ lr (float, optional): learning rate. (default: 1e-3)
+ betas (Tuple[float, float], optional): coefficients used for computing
+ running averages of gradient and its norm. (default: (0.9, 0.999))
+ eps (float, optional): term added to the denominator to improve
+ numerical stability. (default: 1e-8)
+ weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+ grad_averaging (bool, optional): whether apply (1-beta2) to grad when
+ calculating running averages of gradient. (default: True)
+ max_grad_norm (float, optional): value used to clip global grad norm
+ (default: 1.0)
+ trust_clip (bool): enable LAMBC trust ratio clipping (default: False)
+ always_adapt (boolean, optional): Apply adaptive learning rate to 0.0
+ weight decay parameter (default: False)
+ """ # noqa: E501
+
+ def __init__(self,
+ params,
+ lr=1e-3,
+ bias_correction=True,
+ betas=(0.9, 0.999),
+ eps=1e-6,
+ weight_decay=0.01,
+ grad_averaging=True,
+ max_grad_norm=1.0,
+ trust_clip=False,
+ always_adapt=False):
+ defaults = dict(
+ lr=lr,
+ bias_correction=bias_correction,
+ betas=betas,
+ eps=eps,
+ weight_decay=weight_decay,
+ grad_averaging=grad_averaging,
+ max_grad_norm=max_grad_norm,
+ trust_clip=trust_clip,
+ always_adapt=always_adapt)
+ super().__init__(params, defaults)
+
+ @torch.no_grad()
+ def step(self, closure=None):
+ """Performs a single optimization step.
+ Arguments:
+ closure (callable, optional): A closure that reevaluates the model
+ and returns the loss.
+ """
+ loss = None
+ if closure is not None:
+ with torch.enable_grad():
+ loss = closure()
+
+ device = self.param_groups[0]['params'][0].device
+ one_tensor = torch.tensor(
+ 1.0, device=device
+ ) # because torch.where doesn't handle scalars correctly
+ global_grad_norm = torch.zeros(1, device=device)
+ for group in self.param_groups:
+ for p in group['params']:
+ if p.grad is None:
+ continue
+ grad = p.grad
+ if grad.is_sparse:
+ raise RuntimeError(
+ 'Lamb does not support sparse gradients, consider '
+ 'SparseAdam instead.')
+ global_grad_norm.add_(grad.pow(2).sum())
+
+ global_grad_norm = torch.sqrt(global_grad_norm)
+ # FIXME it'd be nice to remove explicit tensor conversion of scalars
+ # when torch.where promotes
+ # scalar types properly https://github.com/pytorch/pytorch/issues/9190
+ max_grad_norm = torch.tensor(
+ self.defaults['max_grad_norm'], device=device)
+ clip_global_grad_norm = torch.where(global_grad_norm > max_grad_norm,
+ global_grad_norm / max_grad_norm,
+ one_tensor)
+
+ for group in self.param_groups:
+ bias_correction = 1 if group['bias_correction'] else 0
+ beta1, beta2 = group['betas']
+ grad_averaging = 1 if group['grad_averaging'] else 0
+ beta3 = 1 - beta1 if grad_averaging else 1.0
+
+ # assume same step across group now to simplify things
+ # per parameter step can be easily support by making it tensor, or
+ # pass list into kernel
+ if 'step' in group:
+ group['step'] += 1
+ else:
+ group['step'] = 1
+
+ if bias_correction:
+ bias_correction1 = 1 - beta1**group['step']
+ bias_correction2 = 1 - beta2**group['step']
+ else:
+ bias_correction1, bias_correction2 = 1.0, 1.0
+
+ for p in group['params']:
+ if p.grad is None:
+ continue
+ grad = p.grad.div_(clip_global_grad_norm)
+ state = self.state[p]
+
+ # State initialization
+ if len(state) == 0:
+ # Exponential moving average of gradient valuesa
+ state['exp_avg'] = torch.zeros_like(p)
+ # Exponential moving average of squared gradient values
+ state['exp_avg_sq'] = torch.zeros_like(p)
+
+ exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
+
+ # Decay the first and second moment running average coefficient
+ exp_avg.mul_(beta1).add_(grad, alpha=beta3) # m_t
+ exp_avg_sq.mul_(beta2).addcmul_(
+ grad, grad, value=1 - beta2) # v_t
+
+ denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(
+ group['eps'])
+ update = (exp_avg / bias_correction1).div_(denom)
+
+ weight_decay = group['weight_decay']
+ if weight_decay != 0:
+ update.add_(p, alpha=weight_decay)
+
+ if weight_decay != 0 or group['always_adapt']:
+ # Layer-wise LR adaptation. By default, skip adaptation on
+ # parameters that are
+ # excluded from weight decay, unless always_adapt == True,
+ # then always enabled.
+ w_norm = p.norm(2.0)
+ g_norm = update.norm(2.0)
+ # FIXME nested where required since logical and/or not
+ # working in PT XLA
+ trust_ratio = torch.where(
+ w_norm > 0,
+ torch.where(g_norm > 0, w_norm / g_norm, one_tensor),
+ one_tensor,
+ )
+ if group['trust_clip']:
+ # LAMBC trust clipping, upper bound fixed at one
+ trust_ratio = torch.minimum(trust_ratio, one_tensor)
+ update.mul_(trust_ratio)
+
+ p.add_(update, alpha=-group['lr'])
+
+ return loss
diff --git a/easycv/datasets/classification/pipelines/auto_augment.py b/easycv/datasets/classification/pipelines/auto_augment.py
index 9f4137e6..e9bef83a 100644
--- a/easycv/datasets/classification/pipelines/auto_augment.py
+++ b/easycv/datasets/classification/pipelines/auto_augment.py
@@ -8,7 +8,7 @@ from typing import Sequence
import mmcv
import numpy as np
-from PIL import Image
+from PIL import Image, ImageFilter
from easycv.datasets.registry import PIPELINES
from easycv.datasets.shared.pipelines import Compose
@@ -1043,3 +1043,37 @@ class Cutout(object):
repr_str += f'pad_val={self.pad_val}, '
repr_str += f'prob={self.prob})'
return repr_str
+
+
+@PIPELINES.register_module()
+class PILGaussianBlur(object):
+
+ def __init__(self, prob=0.1, radius_min=0.1, radius_max=2.):
+ assert 0 <= prob <= 1.0, 'The prob should be in range [0,1], ' \
+ f'got {prob} instead.'
+ assert isinstance(radius_min, (int, float)), 'The radius_min type must '\
+ f'be int or float, but got {type(radius_min)} instead.'
+ assert isinstance(radius_max, (int, float)), 'The radius_max type must '\
+ f'be int or float, but got {type(radius_max)} instead.'
+
+ self.prob = prob
+ self.radius_min = radius_min
+ self.radius_max = radius_max
+
+ def __call__(self, results):
+ if np.random.rand() > self.prob:
+ return results
+
+ for key in results.get('img_fields', ['img']):
+ img = results[key].filter(
+ ImageFilter.GaussianBlur(
+ radius=random.uniform(self.radius_min, self.radius_max)))
+ results[key] = img
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(prob={self.prob}, '
+ repr_str += f'radius_min={self.radius_min}, '
+ repr_str += f'radius_max={self.radius_max})'
+ return repr_str
diff --git a/easycv/datasets/loader/build_loader.py b/easycv/datasets/loader/build_loader.py
index 6af50073..08127325 100644
--- a/easycv/datasets/loader/build_loader.py
+++ b/easycv/datasets/loader/build_loader.py
@@ -14,7 +14,7 @@ from easycv.datasets.shared.odps_reader import set_dataloader_workid
from easycv.utils.dist_utils import sync_random_seed
from easycv.utils.torchacc_util import is_torchacc_enabled
from .collate import CollateWrapper
-from .sampler import DistributedMPSampler, DistributedSampler
+from .sampler import DistributedMPSampler, DistributedSampler, RASampler
if platform.system() != 'Windows':
# https://github.com/pytorch/pytorch/issues/973
@@ -35,6 +35,7 @@ def build_dataloader(dataset,
odps_config=None,
persistent_workers=False,
collate_hooks=None,
+ use_repeated_augment_sampler=False,
**kwargs):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
@@ -56,6 +57,8 @@ def build_dataloader(dataset,
data in worker process can be reused.
persistent_workers (bool) : After pytorch1.7, could use persistent_workers=True to
avoid reconstruct dataworker before each epoch, speed up before epoch
+ use_repeated_augment_sampler (bool) : If set true, it will use RASampler.
+ Default: False.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
@@ -68,7 +71,9 @@ def build_dataloader(dataset,
'split_huge_listfile_byrank',
False)
- if hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
+ if use_repeated_augment_sampler:
+ sampler = RASampler(dataset, world_size, rank, shuffle=shuffle)
+ elif hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
sampler = DistributedMPSampler(
dataset,
world_size,
@@ -88,7 +93,10 @@ def build_dataloader(dataset,
else:
if replace:
raise NotImplementedError
- if hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
+
+ if use_repeated_augment_sampler:
+ sampler = RASampler(dataset, 1, 0, shuffle=shuffle)
+ elif hasattr(dataset, 'm_per_class') and dataset.m_per_class > 1:
sampler = DistributedMPSampler(
dataset, 1, 0, shuffle=shuffle, replace=replace)
else:
diff --git a/easycv/datasets/loader/sampler.py b/easycv/datasets/loader/sampler.py
index 6fe6863c..4c22695f 100644
--- a/easycv/datasets/loader/sampler.py
+++ b/easycv/datasets/loader/sampler.py
@@ -6,6 +6,7 @@ import random
import numpy as np
import torch
+import torch.distributed as dist
from mmcv.runner import get_dist_info
from torch.utils.data import DistributedSampler as _DistributedSampler
from torch.utils.data import Sampler
@@ -469,3 +470,73 @@ class DistributedGivenIterationSampler(Sampler):
def set_epoch(self, epoch):
pass
+
+
+class RASampler(torch.utils.data.Sampler):
+ """Sampler that restricts data loading to a subset of the dataset for distributed,
+ with repeated augmentation.
+ It ensures that different each augmented version of a sample will be visible to a
+ different process (GPU)
+ Heavily based on torch.utils.data.DistributedSampler
+ """
+
+ def __init__(self,
+ dataset,
+ num_replicas=None,
+ rank=None,
+ shuffle=True,
+ num_repeats: int = 3):
+ if num_replicas is None:
+ if not dist.is_available():
+ raise RuntimeError(
+ 'Requires distributed package to be available')
+ num_replicas = dist.get_world_size()
+ if rank is None:
+ if not dist.is_available():
+ raise RuntimeError(
+ 'Requires distributed package to be available')
+ rank = dist.get_rank()
+ if num_repeats < 1:
+ raise ValueError('num_repeats should be greater than 0')
+ self.dataset = dataset
+ self.num_replicas = num_replicas
+ self.rank = rank
+ self.num_repeats = num_repeats
+ self.epoch = 0
+ self.num_samples = int(
+ math.ceil(
+ len(self.dataset) * self.num_repeats / self.num_replicas))
+ self.total_size = self.num_samples * self.num_replicas
+ # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas))
+ self.num_selected_samples = int(
+ math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
+ self.shuffle = shuffle
+
+ def __iter__(self):
+ if self.shuffle:
+ # deterministically shuffle based on epoch
+ g = torch.Generator()
+ g.manual_seed(self.epoch)
+ indices = torch.randperm(len(self.dataset), generator=g)
+ else:
+ indices = torch.arange(start=0, end=len(self.dataset))
+
+ # add extra samples to make it evenly divisible
+ indices = torch.repeat_interleave(
+ indices, repeats=self.num_repeats, dim=0).tolist()
+ padding_size: int = self.total_size - len(indices)
+ if padding_size > 0:
+ indices += indices[:padding_size]
+ assert len(indices) == self.total_size
+
+ # subsample
+ indices = indices[self.rank:self.total_size:self.num_replicas]
+ assert len(indices) == self.num_samples
+
+ return iter(indices[:self.num_selected_samples])
+
+ def __len__(self):
+ return self.num_selected_samples
+
+ def set_epoch(self, epoch):
+ self.epoch = epoch
diff --git a/easycv/hooks/__init__.py b/easycv/hooks/__init__.py
index c292038f..f2814dac 100644
--- a/easycv/hooks/__init__.py
+++ b/easycv/hooks/__init__.py
@@ -13,7 +13,8 @@ from .eval_hook import DistEvalHook, EvalHook
from .export_hook import ExportHook
from .extractor import Extractor
from .logger import PreLoggerHook
-from .lr_update_hook import StepFixCosineAnnealingLrUpdaterHook
+from .lr_update_hook import (CosineAnnealingWarmupByEpochLrUpdaterHook,
+ StepFixCosineAnnealingLrUpdaterHook)
from .optimizer_hook import OptimizerHook
from .oss_sync_hook import OSSSyncHook
from .registry import HOOKS
@@ -33,7 +34,8 @@ __all__ = [
'OSSSyncHook', 'HOOKS', 'TIMEHook', 'SWAVHook', 'SyncNormHook',
'SyncRandomSizeHook', 'TensorboardLoggerHookV2', 'WandbLoggerHookV2',
'YOLOXLrUpdaterHook', 'YOLOXModeSwitchHook', 'MixupCollateHook',
- 'PreLoggerHook', 'StepFixCosineAnnealingLrUpdaterHook', 'ThroughputHook'
+ 'PreLoggerHook', 'StepFixCosineAnnealingLrUpdaterHook',
+ 'CosineAnnealingWarmupByEpochLrUpdaterHook', 'ThroughputHook'
]
if LooseVersion(torch.__version__) >= LooseVersion('1.6.0'):
diff --git a/easycv/hooks/lr_update_hook.py b/easycv/hooks/lr_update_hook.py
index 39ca8f53..af1bc514 100644
--- a/easycv/hooks/lr_update_hook.py
+++ b/easycv/hooks/lr_update_hook.py
@@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv import runner
from mmcv.runner import HOOKS
from mmcv.runner.hooks.lr_updater import (CosineAnnealingLrUpdaterHook,
annealing_cos)
@@ -54,3 +55,29 @@ class StepFixCosineAnnealingLrUpdaterHook(CosineAnnealingLrUpdaterHook):
target_lr = self.min_lr
return annealing_cos(base_lr, target_lr, progress / max_progress)
+
+
+@HOOKS.register_module()
+class CosineAnnealingWarmupByEpochLrUpdaterHook(CosineAnnealingLrUpdaterHook):
+
+ def before_train_iter(self, runner: 'runner.BaseRunner'):
+ cur_iter = runner.iter
+ epoch_len = len(runner.data_loader)
+ assert isinstance(self.warmup_iters, int)
+ if not self.by_epoch:
+ self.regular_lr = self.get_regular_lr(runner)
+ if self.warmup is None or cur_iter >= self.warmup_iters:
+ self._set_lr(runner, self.regular_lr)
+ else:
+ if cur_iter % epoch_len == 0:
+ warmup_lr = self.get_warmup_lr(cur_iter)
+ self._set_lr(runner, warmup_lr)
+ elif self.by_epoch:
+ if self.warmup is None or cur_iter > self.warmup_iters:
+ return
+ elif cur_iter == self.warmup_iters:
+ self._set_lr(runner, self.regular_lr)
+ else:
+ if cur_iter % epoch_len == 0:
+ warmup_lr = self.get_warmup_lr(cur_iter)
+ self._set_lr(runner, warmup_lr)
diff --git a/easycv/models/backbones/__init__.py b/easycv/models/backbones/__init__.py
index f0be50ae..add29a07 100644
--- a/easycv/models/backbones/__init__.py
+++ b/easycv/models/backbones/__init__.py
@@ -19,4 +19,5 @@ from .resnet_jit import ResNetJIT
from .resnext import ResNeXt
from .shuffle_transformer import ShuffleTransformer
from .swin_transformer import SwinTransformer
+from .vision_transformer import VisionTransformer
from .vitdet import ViTDet
diff --git a/easycv/models/backbones/conv_mae_vit.py b/easycv/models/backbones/conv_mae_vit.py
index 02755faf..ce9b7b61 100644
--- a/easycv/models/backbones/conv_mae_vit.py
+++ b/easycv/models/backbones/conv_mae_vit.py
@@ -10,7 +10,7 @@ from timm.models.layers import trunc_normal_
from easycv.models.registry import BACKBONES
from easycv.models.utils import DropPath
from easycv.models.utils.pos_embed import get_2d_sincos_pos_embed
-from .vit_transfomer_dynamic import Block
+from .vision_transformer import Block
class PatchEmbed(nn.Module):
diff --git a/easycv/models/backbones/vision_transformer.py b/easycv/models/backbones/vision_transformer.py
new file mode 100644
index 00000000..2061979d
--- /dev/null
+++ b/easycv/models/backbones/vision_transformer.py
@@ -0,0 +1,287 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+"""
+Mostly copy-paste from timm library.
+https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
+
+"""
+import math
+from functools import partial
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_
+
+from easycv.models.utils import DropPath, Mlp
+from ..registry import BACKBONES
+
+
+class Attention(nn.Module):
+
+ def __init__(self,
+ dim,
+ num_heads=8,
+ qkv_bias=False,
+ qk_scale=None,
+ attn_drop=0.,
+ proj_drop=0.):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.scale = qk_scale or head_dim**-0.5
+
+ self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x, rel_pos_bias=None):
+ B, N, C = x.shape
+ qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
+ C // self.num_heads).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2]
+
+ attn = (q @ k.transpose(-2, -1)) * self.scale
+
+ if rel_pos_bias is not None:
+ attn = attn + rel_pos_bias
+
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+ return x, attn
+
+
+class Block(nn.Module):
+
+ def __init__(self,
+ dim,
+ num_heads,
+ mlp_ratio=4.,
+ qkv_bias=False,
+ qk_scale=None,
+ drop=0.,
+ attn_drop=0.,
+ drop_path=0.,
+ act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm,
+ use_layer_scale=False,
+ init_values=1e-4):
+ super().__init__()
+ self.norm1 = norm_layer(dim)
+ self.attn = Attention(
+ dim,
+ num_heads=num_heads,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ attn_drop=attn_drop,
+ proj_drop=drop)
+ self.drop_path = DropPath(
+ drop_path) if drop_path > 0. else nn.Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(
+ in_features=dim,
+ hidden_features=mlp_hidden_dim,
+ act_layer=act_layer,
+ drop=drop)
+ self.use_layer_scale = use_layer_scale
+ if self.use_layer_scale:
+ self.gamma_1 = nn.Parameter(
+ init_values * torch.ones((dim)), requires_grad=True)
+ self.gamma_2 = nn.Parameter(
+ init_values * torch.ones((dim)), requires_grad=True)
+
+ def forward(self, x, return_attention=False, rel_pos_bias=None):
+ y, attn = self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)
+ if return_attention:
+ return attn
+ if self.use_layer_scale:
+ x = x + self.drop_path(self.gamma_1 * y)
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+ else:
+ x = x + self.drop_path(y)
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x
+
+ def forward_fea_and_attn(self, x):
+ y, attn = self.attn(self.norm1(x))
+ if self.use_layer_scale:
+ x = x + self.drop_path(self.gamma_1 * y)
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+ else:
+ x = x + self.drop_path(y)
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x, attn
+
+
+class PatchEmbed(nn.Module):
+ """ Image to Patch Embedding
+ """
+
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+ super().__init__()
+ num_patches = (img_size // patch_size) * (img_size // patch_size)
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2d(
+ in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+ def forward(self, x):
+ B, C, H, W = x.shape
+ x = self.proj(x).flatten(2).transpose(1, 2)
+ return x
+
+
+@BACKBONES.register_module
+class VisionTransformer(nn.Module):
+ """ DeiT III is based on ViT. It uses some strategies to make the vit model
+ better, just like layer scale, stochastic depth, 3-Augment.
+
+ Paper link: https://arxiv.org/pdf/2204.07118.pdf (DeiT III: Revenge of the ViT)
+
+ Args:
+ img_size (list): Input image size. img_size=[224] means the image size is
+ 224*224. img_size=[192, 224] means the image size is 192*224.
+ patch_size (int): The patch size. Default: 16
+ in_chans (int): The num of input channels. Default: 3
+ num_classes (int): The num of picture classes. Default: 1000
+ embed_dim (int): The dimensions of embedding. Default: 768
+ depth (int): The num of blocks. Default: 12
+ num_heads (int): Parallel attention heads. Default: 12
+ mlp_ratio (float): Mlp expansion ratio. Default: 4.0
+ qkv_bias (bool): Does kqv use bias. Default: False
+ qk_scale (float | None): In the step of self-attention, if qk_scale is not
+ None, it will use qk_scale to scale the q @ k. Otherwise it will use
+ head_dim**-0.5 instead of qk_scale. Default: None
+ drop_rate (float): Probability of an element to be zeroed after the feed
+ forward layer. Default: 0.0
+ drop_path_rate (float): Stochastic depth rate. Default: 0
+ norm_layer (nn.Module): normalization layer
+ use_dense_prediction (bool): If use_dense_prediction is True, the global
+ pool and norm will before head will be removed.(if any) Default: False
+ global_pool (bool): Global pool before head. Default: False
+ use_layer_scale (bool): If use_layer_scale is True, it will use layer
+ scale. Default: False
+ init_scale (float): It is used for layer scale in Block to scale the
+ gamma_1 and gamma_2.
+
+ """
+
+ def __init__(self,
+ img_size=[224],
+ patch_size=16,
+ in_chans=3,
+ num_classes=1000,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ mlp_ratio=4.,
+ qkv_bias=False,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.,
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
+ use_dense_prediction=False,
+ global_pool=False,
+ use_layer_scale=False,
+ init_scale=1e-4,
+ **kwargs):
+ super().__init__()
+
+ self.num_features = self.embed_dim = embed_dim
+
+ self.patch_embed = PatchEmbed(
+ img_size=img_size[0],
+ patch_size=patch_size,
+ in_chans=in_chans,
+ embed_dim=embed_dim)
+ num_patches = self.patch_embed.num_patches
+
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
+ self.pos_drop = nn.Dropout(p=drop_rate)
+
+ self.drop_path_rate = drop_path_rate
+ self.depth = depth
+ dpr = [drop_path_rate for i in range(depth)]
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=embed_dim,
+ num_heads=num_heads,
+ mlp_ratio=mlp_ratio,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ drop=drop_rate,
+ attn_drop=attn_drop_rate,
+ drop_path=dpr[i],
+ norm_layer=norm_layer,
+ use_layer_scale=use_layer_scale,
+ init_values=init_scale) for i in range(depth)
+ ])
+ self.norm = norm_layer(embed_dim)
+
+ # Classifier head
+ self.head = nn.Linear(
+ embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+ # Dense prediction head
+ self.use_dense_prediction = use_dense_prediction
+ if self.use_dense_prediction:
+ self.head_dense = None
+
+ # Use global average pooling
+ self.global_pool = global_pool
+ if self.global_pool:
+ self.fc_norm = norm_layer(embed_dim)
+ self.norm = None
+
+ def init_weights(self):
+ trunc_normal_(self.pos_embed, std=.02)
+ trunc_normal_(self.cls_token, std=.02)
+
+ for m in self.modules():
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+ def forward(self, x):
+
+ x = self.forward_features(x)
+ x = self.pos_drop(x)
+ x = self.head(x)
+
+ return [x]
+
+ def forward_features(self, x):
+ B = x.shape[0]
+ x = self.patch_embed(x)
+
+ cls_tokens = self.cls_token.expand(B, -1, -1)
+
+ x = x + self.pos_embed
+ x = torch.cat((cls_tokens, x), dim=1)
+
+ for blk in self.blocks:
+ x = blk(x)
+ if self.norm is not None:
+ x = self.norm(x)
+
+ if self.use_dense_prediction:
+ return x[:, 0], x[:, 1:]
+ else:
+ if self.global_pool:
+ x = x[:, 1:, :].mean(dim=1)
+ return self.fc_norm(x)
+ else:
+ return x[:, 0]
diff --git a/easycv/models/backbones/vit_transfomer_dynamic.py b/easycv/models/backbones/vit_transfomer_dynamic.py
index 3ff94701..6df88d2c 100644
--- a/easycv/models/backbones/vit_transfomer_dynamic.py
+++ b/easycv/models/backbones/vit_transfomer_dynamic.py
@@ -12,197 +12,25 @@ from functools import partial
import torch
import torch.nn as nn
-from timm.models.layers import trunc_normal_
-from easycv.models.utils import DropPath, Mlp
+from easycv.models.backbones.vision_transformer import VisionTransformer
-class Attention(nn.Module):
-
- def __init__(self,
- dim,
- num_heads=8,
- qkv_bias=False,
- qk_scale=None,
- attn_drop=0.,
- proj_drop=0.):
- super().__init__()
- self.num_heads = num_heads
- head_dim = dim // num_heads
- self.scale = qk_scale or head_dim**-0.5
-
- self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
- self.attn_drop = nn.Dropout(attn_drop)
- self.proj = nn.Linear(dim, dim)
- self.proj_drop = nn.Dropout(proj_drop)
-
- def forward(self, x, rel_pos_bias=None):
- B, N, C = x.shape
- qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,
- C // self.num_heads).permute(2, 0, 3, 1, 4)
- q, k, v = qkv[0], qkv[1], qkv[2]
-
- attn = (q @ k.transpose(-2, -1)) * self.scale
-
- if rel_pos_bias is not None:
- attn = attn + rel_pos_bias
-
- attn = attn.softmax(dim=-1)
- attn = self.attn_drop(attn)
-
- x = (attn @ v).transpose(1, 2).reshape(B, N, C)
- x = self.proj(x)
- x = self.proj_drop(x)
- return x, attn
-
-
-class Block(nn.Module):
-
- def __init__(self,
- dim,
- num_heads,
- mlp_ratio=4.,
- qkv_bias=False,
- qk_scale=None,
- drop=0.,
- attn_drop=0.,
- drop_path=0.,
- act_layer=nn.GELU,
- norm_layer=nn.LayerNorm):
- super().__init__()
- self.norm1 = norm_layer(dim)
- self.attn = Attention(
- dim,
- num_heads=num_heads,
- qkv_bias=qkv_bias,
- qk_scale=qk_scale,
- attn_drop=attn_drop,
- proj_drop=drop)
- self.drop_path = DropPath(
- drop_path) if drop_path > 0. else nn.Identity()
- self.norm2 = norm_layer(dim)
- mlp_hidden_dim = int(dim * mlp_ratio)
- self.mlp = Mlp(
- in_features=dim,
- hidden_features=mlp_hidden_dim,
- act_layer=act_layer,
- drop=drop)
-
- def forward(self, x, return_attention=False, rel_pos_bias=None):
- y, attn = self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)
- if return_attention:
- return attn
- x = x + self.drop_path(y)
- x = x + self.drop_path(self.mlp(self.norm2(x)))
- return x
-
- def forward_fea_and_attn(self, x):
- y, attn = self.attn(self.norm1(x))
- x = x + self.drop_path(y)
- x = x + self.drop_path(self.mlp(self.norm2(x)))
- return x, attn
-
-
-class PatchEmbed(nn.Module):
- """ Image to Patch Embedding
- """
-
- def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
- super().__init__()
- num_patches = (img_size // patch_size) * (img_size // patch_size)
- self.img_size = img_size
- self.patch_size = patch_size
- self.num_patches = num_patches
-
- self.proj = nn.Conv2d(
- in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
-
- def forward(self, x):
- B, C, H, W = x.shape
- x = self.proj(x).flatten(2).transpose(1, 2)
- return x
-
-
-class DynamicVisionTransformer(nn.Module):
+class DynamicVisionTransformer(VisionTransformer):
"""Dynamic Vision Transformer """
- def __init__(self,
- img_size=[224],
- patch_size=16,
- in_chans=3,
- num_classes=0,
- embed_dim=768,
- depth=12,
- num_heads=12,
- mlp_ratio=4.,
- qkv_bias=False,
- qk_scale=None,
- drop_rate=0.,
- attn_drop_rate=0.,
- drop_path_rate=0.,
- norm_layer=nn.LayerNorm,
- use_dense_prediction=False,
- global_pool=False,
- **kwargs):
- super().__init__()
- self.num_features = self.embed_dim = embed_dim
+ def __init__(self, **kwargs):
+ super(DynamicVisionTransformer, self).__init__(**kwargs)
- self.patch_embed = PatchEmbed(
- img_size=img_size[0],
- patch_size=patch_size,
- in_chans=in_chans,
- embed_dim=embed_dim)
num_patches = self.patch_embed.num_patches
- self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
self.pos_embed = nn.Parameter(
- torch.zeros(1, num_patches + 1, embed_dim))
- self.pos_drop = nn.Dropout(p=drop_rate)
+ torch.zeros(1, num_patches + 1, self.embed_dim))
- dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
- ] # stochastic depth decay rule
- self.blocks = nn.ModuleList([
- Block(
- dim=embed_dim,
- num_heads=num_heads,
- mlp_ratio=mlp_ratio,
- qkv_bias=qkv_bias,
- qk_scale=qk_scale,
- drop=drop_rate,
- attn_drop=attn_drop_rate,
- drop_path=dpr[i],
- norm_layer=norm_layer) for i in range(depth)
- ])
- self.norm = norm_layer(embed_dim)
-
- # Classifier head
- self.head = nn.Linear(
- embed_dim, num_classes) if num_classes > 0 else nn.Identity()
-
- # Dense prediction head
- self.use_dense_prediction = use_dense_prediction
- if self.use_dense_prediction:
- self.head_dense = None
-
-
-# Use global average pooling
- self.global_pool = global_pool
- if self.global_pool:
- self.fc_norm = norm_layer(embed_dim)
- self.norm = None
-
- trunc_normal_(self.pos_embed, std=.02)
- trunc_normal_(self.cls_token, std=.02)
-
- def init_weights(self):
- for m in self.modules():
- if isinstance(m, nn.Linear):
- trunc_normal_(m.weight, std=.02)
- if isinstance(m, nn.Linear) and m.bias is not None:
- nn.init.constant_(m.bias, 0)
- elif isinstance(m, nn.LayerNorm):
- nn.init.constant_(m.bias, 0)
- nn.init.constant_(m.weight, 1.0)
+ dpr = [
+ x.item()
+ for x in torch.linspace(0, self.drop_path_rate, self.depth)
+ ]
def forward(self, x):
# convert to list
diff --git a/easycv/models/classification/classification.py b/easycv/models/classification/classification.py
index ddcc9e31..34bde969 100644
--- a/easycv/models/classification/classification.py
+++ b/easycv/models/classification/classification.py
@@ -53,22 +53,15 @@ class Classification(BaseModel):
if 'mixUp' in train_preprocess:
rank, _ = get_dist_info()
np.random.seed(rank + 12)
- if not mixup_cfg:
- num_classes = head.get(
- 'num_classes',
- 1000) if 'num_classes' in head else backbone.get(
- 'num_classes', 1000)
- mixup_cfg = dict(
- mixup_alpha=0.8,
- cutmix_alpha=1.0,
- cutmix_minmax=None,
- prob=1.0,
- switch_prob=0.5,
- mode='batch',
- label_smoothing=0.1,
- num_classes=num_classes)
- self.mixup = Mixup(**mixup_cfg)
- head.loss_config = {'type': 'SoftTargetCrossEntropy'}
+ if mixup_cfg is not None:
+ if 'num_classes' in mixup_cfg:
+ self.mixup = Mixup(**mixup_cfg)
+ elif 'num_classes' in head or 'num_classes' in backbone:
+ num_classes = head.get(
+ 'num_classes'
+ ) if 'num_classes' in head else backbone.get('num_classes')
+ mixup_cfg['num_classes'] = num_classes
+ self.mixup = Mixup(**mixup_cfg)
train_preprocess.remove('mixUp')
self.train_preprocess = [
self.preprocess_key_map[i] for i in train_preprocess
@@ -173,7 +166,10 @@ class Classification(BaseModel):
for preprocess in self.train_preprocess:
img = preprocess(img)
- if hasattr(self, 'mixup'):
+ # When the number of samples in the dataset is odd, the last batch size of each epoch will be odd,
+ # which will cause mixup to report an error. To avoid this situation, mixup is applied only when
+ # the batch size is even.
+ if hasattr(self, 'mixup') and len(img) % 2 == 0:
img, gt_labels = self.mixup(img, gt_labels)
x = self.forward_backbone(img)
diff --git a/easycv/models/heads/cls_head.py b/easycv/models/heads/cls_head.py
index 4b4654d6..ff3724d7 100644
--- a/easycv/models/heads/cls_head.py
+++ b/easycv/models/heads/cls_head.py
@@ -28,7 +28,8 @@ class ClsHead(nn.Module):
},
input_feature_index=[0],
init_cfg=dict(
- type='Normal', layer='Linear', std=0.01, bias=0.)):
+ type='Normal', layer='Linear', std=0.01, bias=0.),
+ use_num_classes=True):
super(ClsHead, self).__init__()
self.with_avg_pool = with_avg_pool
@@ -46,7 +47,8 @@ class ClsHead(nn.Module):
'label_smooth must be given as a float number in [0,1]'
logger.info(f'=> Augment: using label smooth={self.label_smooth}')
loss_config['label_smooth'] = label_smooth
- loss_config['num_classes'] = num_classes
+ if use_num_classes:
+ loss_config['num_classes'] = num_classes
self.criterion = build_from_cfg(loss_config, LOSSES)
diff --git a/easycv/models/loss/cross_entropy_loss.py b/easycv/models/loss/cross_entropy_loss.py
index 69e1f615..0f9d5074 100644
--- a/easycv/models/loss/cross_entropy_loss.py
+++ b/easycv/models/loss/cross_entropy_loss.py
@@ -115,6 +115,7 @@ def binary_cross_entropy(pred,
class_weight=None,
ignore_index=-100,
avg_non_ignore=False,
+ label_ceil=False,
**kwargs):
"""Calculate the binary CrossEntropy loss.
@@ -132,11 +133,14 @@ def binary_cross_entropy(pred,
avg_non_ignore (bool): The flag decides to whether the loss is
only averaged over non-ignored targets. Default: False.
`New in version 0.23.0.`
+ label_ceil (bool): When use bce and set label_ceil=True,
+ it will make elements belong to (0, 1] in label change to 1.
+ Default: False.
Returns:
torch.Tensor: The calculated loss
"""
- if len(pred.shape) > 1 and pred.shape(1) == 1:
+ if len(pred.shape) > 1 and pred.shape[1] == 1:
# For binary class segmentation, the shape of pred is
# [N, 1, H, W] and that of label is [N, H, W].
# As the ignore_index often set as 255, so the
@@ -162,6 +166,8 @@ def binary_cross_entropy(pred,
weight = weight * valid_mask
else:
weight = valid_mask
+ if label_ceil:
+ label = label.gt(0.0).type(label.dtype)
# average loss over non-ignored and valid elements
if reduction == 'mean' and avg_factor is None and avg_non_ignore:
avg_factor = valid_mask.sum().item()
@@ -234,6 +240,9 @@ class CrossEntropyLoss(nn.Module):
avg_non_ignore (bool): The flag decides to whether the loss is
only averaged over non-ignored targets. Default: False.
`New in version 0.23.0.`
+ label_ceil (bool): When use bce and set label_ceil=True,
+ it will make elements belong to (0, 1] in label change to 1.
+ Default: False.
"""
def __init__(self,
@@ -243,10 +252,16 @@ class CrossEntropyLoss(nn.Module):
class_weight=None,
loss_weight=1.0,
loss_name='loss_ce',
- avg_non_ignore=False):
+ avg_non_ignore=False,
+ label_ceil=False):
super(CrossEntropyLoss, self).__init__()
assert (use_sigmoid is False) or (use_mask is False)
self.use_sigmoid = use_sigmoid
+ if label_ceil:
+ if not use_sigmoid:
+ raise ValueError(
+ '‘label_ceil’ is supported only when ‘use_sigmoid’ is true. If not use bce, please set ‘label_ceil’=False'
+ )
self.use_mask = use_mask
self.reduction = reduction
self.loss_weight = loss_weight
@@ -266,6 +281,7 @@ class CrossEntropyLoss(nn.Module):
else:
self.cls_criterion = cross_entropy
self._loss_name = loss_name
+ self.label_ceil = label_ceil
def extra_repr(self):
"""Extra repr."""
@@ -289,16 +305,29 @@ class CrossEntropyLoss(nn.Module):
else:
class_weight = None
# Note: for BCE loss, label < 0 is invalid.
- loss_cls = self.loss_weight * self.cls_criterion(
- cls_score,
- label,
- weight,
- class_weight=class_weight,
- reduction=reduction,
- avg_factor=avg_factor,
- avg_non_ignore=self.avg_non_ignore,
- ignore_index=ignore_index,
- **kwargs)
+ if self.use_sigmoid:
+ loss_cls = self.loss_weight * self.cls_criterion(
+ cls_score,
+ label,
+ weight,
+ class_weight=class_weight,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ avg_non_ignore=self.avg_non_ignore,
+ ignore_index=ignore_index,
+ label_ceil=self.label_ceil,
+ **kwargs)
+ else:
+ loss_cls = self.loss_weight * self.cls_criterion(
+ cls_score,
+ label,
+ weight,
+ class_weight=class_weight,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ avg_non_ignore=self.avg_non_ignore,
+ ignore_index=ignore_index,
+ **kwargs)
return loss_cls
@property
diff --git a/tests/models/backbones/test_deitiii.py b/tests/models/backbones/test_deitiii.py
new file mode 100644
index 00000000..f95f6ea5
--- /dev/null
+++ b/tests/models/backbones/test_deitiii.py
@@ -0,0 +1,42 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import numpy as np
+import torch
+from numpy.testing import assert_array_almost_equal
+
+
+class DeiTIIITest(unittest.TestCase):
+
+ def setUp(self):
+ print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+ @unittest.skip('skip DeiT III unittest')
+ def test_deitiii(self):
+ model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/deitiii/epoch_800.pth'
+ config_path = 'configs/classification/imagenet/vit/imagenet_deitiii_large_patch16_192_jpg.py'
+ img = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/demo/deitiii_demo.JPEG'
+ # deitiii = ClsPredictor(model_path, config_path)
+ deitiii = []
+ output = deitiii.predict(img)
+
+ self.assertIn('prob', output)
+ self.assertIn('class', output)
+ self.assertEqual(len(output['prob'][0]), 1000)
+
+ assert_array_almost_equal(
+ output['prob'][0][:10],
+ torch.Tensor([
+ 2.04629918698628899e-06, 5.27398606209317222e-06,
+ 5.52915162188583054e-06, 3.60625563189387321e-06,
+ 3.29447357216849923e-06, 5.61309570912271738e-06,
+ 8.93703327164985240e-06, 4.89157764604897238e-06,
+ 4.39371024185675196e-06, 5.21611764270346612e-06
+ ]),
+ decimal=8)
+
+ self.assertEqual(int(output['class']), 948)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/train.py b/tools/train.py
index 2241e760..96f93db8 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -273,8 +273,9 @@ def main():
drop_last=getattr(cfg.data, 'drop_last', False),
reuse_worker_cache=cfg.data.get('reuse_worker_cache', False),
persistent_workers=cfg.data.get('persistent_workers', False),
- collate_hooks=cfg.data.get('train_collate_hooks', []))
- for ds in datasets
+ collate_hooks=cfg.data.get('train_collate_hooks', []),
+ use_repeated_augment_sampler=cfg.data.get(
+ 'use_repeated_augment_sampler', False)) for ds in datasets
]
else:
default_args = dict(
From 9f01a37ad4df57b30430c41df08459025174e8fd Mon Sep 17 00:00:00 2001
From: tuofeilun <38110862+tuofeilunhifi@users.noreply.github.com>
Date: Fri, 16 Sep 2022 11:03:53 +0800
Subject: [PATCH 4/9] Refactor ViTDet backbone and simple feature pyramid
(#177)
1. The vitdet backbone implemented by d2 is about 20% faster than the vitdet backbone originally reproduced by easycv.
2. 50.57 -> 50.65
---
.../detection/vitdet/lsj_coco_detection.py | 6 +-
configs/detection/vitdet/lsj_coco_instance.py | 6 +-
.../vitdet/vitdet_basicblock_100e.py | 3 -
.../vitdet/vitdet_bottleneck_100e.py | 3 -
.../vitdet/vitdet_cascade_mask_rcnn.py | 231 ++++
.../vitdet/vitdet_cascade_mask_rcnn_100e.py | 4 +
.../detection/vitdet/vitdet_faster_rcnn.py | 31 +-
.../vitdet/vitdet_faster_rcnn_100e.py | 2 +-
configs/detection/vitdet/vitdet_mask_rcnn.py | 31 +-
...itdet_100e.py => vitdet_mask_rcnn_100e.py} | 0
.../detection/vitdet/vitdet_schedule_100e.py | 21 +-
docs/source/_static/result.jpg | 4 +-
docs/source/model_zoo_det.md | 2 +-
.../layer_decay_optimizer_constructor.py | 78 +-
easycv/models/backbones/vitdet.py | 1057 ++++++-----------
easycv/models/detection/necks/fpn.py | 3 -
easycv/models/detection/necks/sfp.py | 216 +---
easycv/predictors/detector.py | 10 +-
tests/models/backbones/test_vitdet.py | 23 +-
tests/predictors/test_detector.py | 189 ++-
20 files changed, 925 insertions(+), 995 deletions(-)
delete mode 100644 configs/detection/vitdet/vitdet_basicblock_100e.py
delete mode 100644 configs/detection/vitdet/vitdet_bottleneck_100e.py
create mode 100644 configs/detection/vitdet/vitdet_cascade_mask_rcnn.py
create mode 100644 configs/detection/vitdet/vitdet_cascade_mask_rcnn_100e.py
rename configs/detection/vitdet/{vitdet_100e.py => vitdet_mask_rcnn_100e.py} (100%)
diff --git a/configs/detection/vitdet/lsj_coco_detection.py b/configs/detection/vitdet/lsj_coco_detection.py
index f5da1064..fb243a23 100644
--- a/configs/detection/vitdet/lsj_coco_detection.py
+++ b/configs/detection/vitdet/lsj_coco_detection.py
@@ -101,13 +101,15 @@ val_dataset = dict(
pipeline=test_pipeline)
data = dict(
- imgs_per_gpu=1, workers_per_gpu=2, train=train_dataset, val=val_dataset)
+ imgs_per_gpu=4, workers_per_gpu=2, train=train_dataset, val=val_dataset
+) # 64(total batch size) = 4 (batch size/per gpu) x 8 (gpu num) x 2(node)
# evaluation
-eval_config = dict(interval=1, gpu_collect=False)
+eval_config = dict(initial=False, interval=1, gpu_collect=False)
eval_pipelines = [
dict(
mode='test',
+ # dist_eval=True,
evaluators=[
dict(type='CocoDetectionEvaluator', classes=CLASSES),
],
diff --git a/configs/detection/vitdet/lsj_coco_instance.py b/configs/detection/vitdet/lsj_coco_instance.py
index a42aa040..5271363f 100644
--- a/configs/detection/vitdet/lsj_coco_instance.py
+++ b/configs/detection/vitdet/lsj_coco_instance.py
@@ -101,13 +101,15 @@ val_dataset = dict(
pipeline=test_pipeline)
data = dict(
- imgs_per_gpu=1, workers_per_gpu=2, train=train_dataset, val=val_dataset)
+ imgs_per_gpu=4, workers_per_gpu=2, train=train_dataset, val=val_dataset
+) # 64(total batch size) = 4 (batch size/per gpu) x 8 (gpu num) x 2(node)
# evaluation
-eval_config = dict(interval=1, gpu_collect=False)
+eval_config = dict(initial=False, interval=1, gpu_collect=False)
eval_pipelines = [
dict(
mode='test',
+ # dist_eval=True,
evaluators=[
dict(type='CocoDetectionEvaluator', classes=CLASSES),
dict(type='CocoMaskEvaluator', classes=CLASSES)
diff --git a/configs/detection/vitdet/vitdet_basicblock_100e.py b/configs/detection/vitdet/vitdet_basicblock_100e.py
deleted file mode 100644
index a3ea54e7..00000000
--- a/configs/detection/vitdet/vitdet_basicblock_100e.py
+++ /dev/null
@@ -1,3 +0,0 @@
-_base_ = './vitdet_100e.py'
-
-model = dict(backbone=dict(aggregation='basicblock'))
diff --git a/configs/detection/vitdet/vitdet_bottleneck_100e.py b/configs/detection/vitdet/vitdet_bottleneck_100e.py
deleted file mode 100644
index a6031797..00000000
--- a/configs/detection/vitdet/vitdet_bottleneck_100e.py
+++ /dev/null
@@ -1,3 +0,0 @@
-_base_ = './vitdet_100e.py'
-
-model = dict(backbone=dict(aggregation='bottleneck'))
diff --git a/configs/detection/vitdet/vitdet_cascade_mask_rcnn.py b/configs/detection/vitdet/vitdet_cascade_mask_rcnn.py
new file mode 100644
index 00000000..dfe0d68d
--- /dev/null
+++ b/configs/detection/vitdet/vitdet_cascade_mask_rcnn.py
@@ -0,0 +1,231 @@
+# model settings
+
+norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
+
+pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
+model = dict(
+ type='CascadeRCNN',
+ pretrained=pretrained,
+ backbone=dict(
+ type='ViTDet',
+ img_size=1024,
+ patch_size=16,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ drop_path_rate=0.1,
+ window_size=14,
+ mlp_ratio=4,
+ qkv_bias=True,
+ window_block_indexes=[
+ # 2, 5, 8 11 for global attention
+ 0,
+ 1,
+ 3,
+ 4,
+ 6,
+ 7,
+ 9,
+ 10,
+ ],
+ residual_block_indexes=[],
+ use_rel_pos=True),
+ neck=dict(
+ type='SFP',
+ in_channels=768,
+ out_channels=256,
+ scale_factors=(4.0, 2.0, 1.0, 0.5),
+ norm_cfg=norm_cfg,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ num_convs=2,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+ roi_head=dict(
+ type='CascadeRoIHead',
+ num_stages=3,
+ stage_loss_weights=[1, 0.5, 0.25],
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=[
+ dict(
+ type='Shared4Conv1FCBBoxHead',
+ conv_out_channels=256,
+ norm_cfg=norm_cfg,
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+ loss_weight=1.0)),
+ dict(
+ type='Shared4Conv1FCBBoxHead',
+ conv_out_channels=256,
+ norm_cfg=norm_cfg,
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+ loss_weight=1.0)),
+ dict(
+ type='Shared4Conv1FCBBoxHead',
+ conv_out_channels=256,
+ norm_cfg=norm_cfg,
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
+ reg_class_agnostic=True,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+ ],
+ mask_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ mask_head=dict(
+ type='FCNMaskHead',
+ norm_cfg=norm_cfg,
+ num_convs=4,
+ in_channels=256,
+ conv_out_channels=256,
+ num_classes=80,
+ loss_mask=dict(
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+ # model training and testing settings
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=0,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=2000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=[
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ mask_size=28,
+ pos_weight=-1,
+ debug=False),
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.6,
+ neg_iou_thr=0.6,
+ min_pos_iou=0.6,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ mask_size=28,
+ pos_weight=-1,
+ debug=False),
+ dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.7,
+ min_pos_iou=0.7,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ mask_size=28,
+ pos_weight=-1,
+ debug=False)
+ ]),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100,
+ mask_thr_binary=0.5)))
+
+mmlab_modules = [
+ dict(type='mmdet', name='CascadeRCNN', module='model'),
+ dict(type='mmdet', name='RPNHead', module='head'),
+ dict(type='mmdet', name='CascadeRoIHead', module='head'),
+]
diff --git a/configs/detection/vitdet/vitdet_cascade_mask_rcnn_100e.py b/configs/detection/vitdet/vitdet_cascade_mask_rcnn_100e.py
new file mode 100644
index 00000000..bbbc339f
--- /dev/null
+++ b/configs/detection/vitdet/vitdet_cascade_mask_rcnn_100e.py
@@ -0,0 +1,4 @@
+_base_ = [
+ './vitdet_cascade_mask_rcnn.py', './lsj_coco_instance.py',
+ './vitdet_schedule_100e.py'
+]
diff --git a/configs/detection/vitdet/vitdet_faster_rcnn.py b/configs/detection/vitdet/vitdet_faster_rcnn.py
index 48604d8b..0a00b397 100644
--- a/configs/detection/vitdet/vitdet_faster_rcnn.py
+++ b/configs/detection/vitdet/vitdet_faster_rcnn.py
@@ -1,6 +1,6 @@
# model settings
-norm_cfg = dict(type='GN', num_groups=1, requires_grad=True)
+norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
model = dict(
@@ -9,22 +9,32 @@ model = dict(
backbone=dict(
type='ViTDet',
img_size=1024,
+ patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
+ drop_path_rate=0.1,
+ window_size=14,
mlp_ratio=4,
qkv_bias=True,
- qk_scale=None,
- drop_rate=0.,
- attn_drop_rate=0.,
- drop_path_rate=0.1,
- use_abs_pos_emb=True,
- aggregation='attn',
- ),
+ window_block_indexes=[
+ # 2, 5, 8 11 for global attention
+ 0,
+ 1,
+ 3,
+ 4,
+ 6,
+ 7,
+ 9,
+ 10,
+ ],
+ residual_block_indexes=[],
+ use_rel_pos=True),
neck=dict(
type='SFP',
- in_channels=[768, 768, 768, 768],
+ in_channels=768,
out_channels=256,
+ scale_factors=(4.0, 2.0, 1.0, 0.5),
norm_cfg=norm_cfg,
num_outs=5),
rpn_head=dict(
@@ -32,7 +42,6 @@ model = dict(
in_channels=256,
feat_channels=256,
num_convs=2,
- norm_cfg=norm_cfg,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
@@ -98,7 +107,7 @@ model = dict(
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
- match_low_quality=True,
+ match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
diff --git a/configs/detection/vitdet/vitdet_faster_rcnn_100e.py b/configs/detection/vitdet/vitdet_faster_rcnn_100e.py
index 5a43b575..bfeab9d1 100644
--- a/configs/detection/vitdet/vitdet_faster_rcnn_100e.py
+++ b/configs/detection/vitdet/vitdet_faster_rcnn_100e.py
@@ -1,4 +1,4 @@
_base_ = [
- './vitdet_faster_rcnn.py', './lsj_coco_detection.py',
+ './vitdet_faster_rcnn.py', './lsj_coco_instance.py',
'./vitdet_schedule_100e.py'
]
diff --git a/configs/detection/vitdet/vitdet_mask_rcnn.py b/configs/detection/vitdet/vitdet_mask_rcnn.py
index 890f6e8f..6b1ed1ce 100644
--- a/configs/detection/vitdet/vitdet_mask_rcnn.py
+++ b/configs/detection/vitdet/vitdet_mask_rcnn.py
@@ -1,6 +1,6 @@
# model settings
-norm_cfg = dict(type='GN', num_groups=1, requires_grad=True)
+norm_cfg = dict(type='GN', num_groups=1, eps=1e-6, requires_grad=True)
pretrained = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/warpper_mae_vit-base-p16-1600e.pth'
model = dict(
@@ -9,22 +9,32 @@ model = dict(
backbone=dict(
type='ViTDet',
img_size=1024,
+ patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
+ drop_path_rate=0.1,
+ window_size=14,
mlp_ratio=4,
qkv_bias=True,
- qk_scale=None,
- drop_rate=0.,
- attn_drop_rate=0.,
- drop_path_rate=0.1,
- use_abs_pos_emb=True,
- aggregation='attn',
- ),
+ window_block_indexes=[
+ # 2, 5, 8 11 for global attention
+ 0,
+ 1,
+ 3,
+ 4,
+ 6,
+ 7,
+ 9,
+ 10,
+ ],
+ residual_block_indexes=[],
+ use_rel_pos=True),
neck=dict(
type='SFP',
- in_channels=[768, 768, 768, 768],
+ in_channels=768,
out_channels=256,
+ scale_factors=(4.0, 2.0, 1.0, 0.5),
norm_cfg=norm_cfg,
num_outs=5),
rpn_head=dict(
@@ -32,7 +42,6 @@ model = dict(
in_channels=256,
feat_channels=256,
num_convs=2,
- norm_cfg=norm_cfg,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
@@ -112,7 +121,7 @@ model = dict(
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
- match_low_quality=True,
+ match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
diff --git a/configs/detection/vitdet/vitdet_100e.py b/configs/detection/vitdet/vitdet_mask_rcnn_100e.py
similarity index 100%
rename from configs/detection/vitdet/vitdet_100e.py
rename to configs/detection/vitdet/vitdet_mask_rcnn_100e.py
diff --git a/configs/detection/vitdet/vitdet_schedule_100e.py b/configs/detection/vitdet/vitdet_schedule_100e.py
index e659b1f6..a9160eba 100644
--- a/configs/detection/vitdet/vitdet_schedule_100e.py
+++ b/configs/detection/vitdet/vitdet_schedule_100e.py
@@ -1,26 +1,29 @@
_base_ = 'configs/base.py'
+log_config = dict(
+ interval=200,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+
checkpoint_config = dict(interval=10)
+
# optimizer
-paramwise_options = {
- 'norm': dict(weight_decay=0.),
- 'bias': dict(weight_decay=0.),
- 'pos_embed': dict(weight_decay=0.),
- 'cls_token': dict(weight_decay=0.)
-}
optimizer = dict(
type='AdamW',
lr=1e-4,
betas=(0.9, 0.999),
weight_decay=0.1,
- paramwise_options=paramwise_options)
-optimizer_config = dict(grad_clip=None, loss_scale=512.)
+ constructor='LayerDecayOptimizerConstructor',
+ paramwise_options=dict(num_layers=12, layer_decay_rate=0.7))
+optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=250,
- warmup_ratio=0.067,
+ warmup_ratio=0.001,
step=[88, 96])
total_epochs = 100
diff --git a/docs/source/_static/result.jpg b/docs/source/_static/result.jpg
index 5bb73d81..d63bad1d 100644
--- a/docs/source/_static/result.jpg
+++ b/docs/source/_static/result.jpg
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:ee64c0caef841c61c7e6344b7fe2c07a38fba07a8de81ff38c0686c641e0a283
-size 190356
+oid sha256:c696a58a2963b5ac47317751f04ff45bfed4723f2f70bacf91eac711f9710e54
+size 189432
diff --git a/docs/source/model_zoo_det.md b/docs/source/model_zoo_det.md
index 03eb3588..474496f0 100644
--- a/docs/source/model_zoo_det.md
+++ b/docs/source/model_zoo_det.md
@@ -22,7 +22,7 @@ Pretrained on COCO2017 dataset. (The result has been optimized with PAI-Blade, a
| Algorithm | Config | Params
(backbone/total) | inference time(V100)
(ms/img) | bbox_mAPval
0.5:0.95 | mask_mAPval
0.5:0.95 | Download |
| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_100e.py) | 88M/118M | 163ms | 50.57 | 44.96 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/vitdet_maskrcnn.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/vitdet_maskrcnn.log.json) |
+| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_mask_rcnn_100e.py) | 86M/111M | 138ms | 50.65 | 45.41 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/20220901_135827.log.json) |
## FCOS
diff --git a/easycv/core/optimizer/layer_decay_optimizer_constructor.py b/easycv/core/optimizer/layer_decay_optimizer_constructor.py
index 45625494..310bb38c 100644
--- a/easycv/core/optimizer/layer_decay_optimizer_constructor.py
+++ b/easycv/core/optimizer/layer_decay_optimizer_constructor.py
@@ -1,5 +1,3 @@
-# Reference from https://github.com/ViTAE-Transformer/ViTDet/blob/main/mmcv_custom/layer_decay_optimizer_constructor.py
-
import json
from mmcv.runner import DefaultOptimizerConstructor, get_dist_info
@@ -7,23 +5,32 @@ from mmcv.runner import DefaultOptimizerConstructor, get_dist_info
from .builder import OPTIMIZER_BUILDERS
-def get_num_layer_for_vit(var_name, num_max_layer, layer_sep=None):
- if var_name in ('backbone.cls_token', 'backbone.mask_token',
- 'backbone.pos_embed'):
- return 0
- elif var_name.startswith('backbone.patch_embed'):
- return 0
- elif var_name.startswith('backbone.blocks'):
- layer_id = int(var_name.split('.')[2])
- return layer_id + 1
- else:
- return num_max_layer - 1
+def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12):
+ """
+ Calculate lr decay rate for different ViT blocks.
+ Reference from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py
+ Args:
+ name (string): parameter name.
+ lr_decay_rate (float): base lr decay rate.
+ num_layers (int): number of ViT blocks.
+ Returns:
+ lr decay rate for the given parameter.
+ """
+ layer_id = num_layers + 1
+ if '.pos_embed' in name or '.patch_embed' in name:
+ layer_id = 0
+ elif '.blocks.' in name and '.residual.' not in name:
+ layer_id = int(name[name.find('.blocks.'):].split('.')[2]) + 1
+
+ scale = lr_decay_rate**(num_layers + 1 - layer_id)
+
+ return layer_id, scale
@OPTIMIZER_BUILDERS.register_module()
class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
- def add_params(self, params, module, prefix='', is_dcn_module=None):
+ def add_params(self, params, module):
"""Add all parameters of module to the params list.
The parameters of the given module will be added to the list of param
groups, with specific rules defined by paramwise_cfg.
@@ -31,54 +38,41 @@ class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
params (list[dict]): A list of param groups, it will be modified
in place.
module (nn.Module): The module to be added.
- prefix (str): The prefix of the module
- is_dcn_module (int|float|None): If the current module is a
- submodule of DCN, `is_dcn_module` will be passed to
- control conv_offset layer's learning rate. Defaults to None.
+
+ Reference from https://github.com/ViTAE-Transformer/ViTDet/blob/main/mmcv_custom/layer_decay_optimizer_constructor.py
+ Note: Currently, this optimizer constructor is built for ViTDet.
"""
- # get param-wise options
parameter_groups = {}
print(self.paramwise_cfg)
- num_layers = self.paramwise_cfg.get('num_layers') + 2
- layer_sep = self.paramwise_cfg.get('layer_sep', None)
- layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
+ lr_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
+ num_layers = self.paramwise_cfg.get('num_layers')
print('Build LayerDecayOptimizerConstructor %f - %d' %
- (layer_decay_rate, num_layers))
+ (lr_decay_rate, num_layers))
+ lr = self.base_lr
weight_decay = self.base_wd
- custom_keys = self.paramwise_cfg.get('custom_keys', {})
- # first sort with alphabet order and then sort with reversed len of str
- sorted_keys = sorted(custom_keys.keys())
-
for name, param in module.named_parameters():
if not param.requires_grad:
continue # frozen weights
- if len(param.shape) == 1 or name.endswith('.bias') or (
- 'pos_embed' in name) or ('cls_token'
- in name) or ('rel_pos_' in name):
+ if 'backbone' in name and ('.norm' in name or '.pos_embed' in name
+ or '.gn.' in name or '.ln.' in name):
group_name = 'no_decay'
this_weight_decay = 0.
else:
group_name = 'decay'
this_weight_decay = weight_decay
- layer_id = get_num_layer_for_vit(name, num_layers, layer_sep)
+ if name.startswith('backbone'):
+ layer_id, scale = get_vit_lr_decay_rate(
+ name, lr_decay_rate=lr_decay_rate, num_layers=num_layers)
+ else:
+ layer_id, scale = -1, 1
group_name = 'layer_%d_%s' % (layer_id, group_name)
- # if the parameter match one of the custom keys, ignore other rules
- this_lr_multi = 1.
- for key in sorted_keys:
- if key in f'{name}':
- lr_mult = custom_keys[key].get('lr_mult', 1.)
- this_lr_multi = lr_mult
- group_name = '%s_%s' % (group_name, key)
- break
-
if group_name not in parameter_groups:
- scale = layer_decay_rate**(num_layers - layer_id - 1)
parameter_groups[group_name] = {
'weight_decay': this_weight_decay,
@@ -86,7 +80,7 @@ class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
'param_names': [],
'lr_scale': scale,
'group_name': group_name,
- 'lr': scale * self.base_lr * this_lr_multi,
+ 'lr': scale * lr,
}
parameter_groups[group_name]['params'].append(param)
diff --git a/easycv/models/backbones/vitdet.py b/easycv/models/backbones/vitdet.py
index 83e11efa..9380f740 100644
--- a/easycv/models/backbones/vitdet.py
+++ b/easycv/models/backbones/vitdet.py
@@ -1,5 +1,3 @@
-# Copyright 2018-2023 OpenMMLab. All rights reserved.
-# Reference: https://github.com/ViTAE-Transformer/ViTDet/blob/main/mmdet/models/backbones/vit.py
import math
from functools import partial
@@ -7,793 +5,466 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint
-from mmcv.cnn import build_norm_layer, constant_init, kaiming_init
-from mmcv.runner import get_dist_info
-from timm.models.layers import to_2tuple, trunc_normal_
-from torch.nn.modules.batchnorm import _BatchNorm
+from timm.models.layers import DropPath, trunc_normal_
-from easycv.models.utils import DropPath, Mlp
+from easycv.models.utils import Mlp
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger
from ..registry import BACKBONES
-from ..utils import build_conv_layer
-
-
-class BasicBlock(nn.Module):
- expansion = 1
-
- def __init__(self,
- inplanes,
- planes,
- stride=1,
- dilation=1,
- conv_cfg=None,
- norm_cfg=dict(type='BN')):
- super(BasicBlock, self).__init__()
-
- self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
- self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
-
- self.conv1 = build_conv_layer(
- conv_cfg,
- inplanes,
- planes,
- 3,
- stride=stride,
- padding=dilation,
- dilation=dilation,
- bias=False)
- self.add_module(self.norm1_name, norm1)
- self.conv2 = build_conv_layer(
- conv_cfg, planes, planes, 3, padding=1, bias=False)
- self.add_module(self.norm2_name, norm2)
-
- self.relu = nn.ReLU(inplace=True)
- self.stride = stride
- self.dilation = dilation
-
- @property
- def norm1(self):
- return getattr(self, self.norm1_name)
-
- @property
- def norm2(self):
- return getattr(self, self.norm2_name)
-
- def forward(self, x, H, W):
- B, _, C = x.shape
- x = x.permute(0, 2, 1).reshape(B, -1, H, W)
- identity = x
-
- out = self.conv1(x)
- out = self.norm1(out)
- out = self.relu(out)
-
- out = self.conv2(out)
- out = self.norm2(out)
-
- out += identity
- out = self.relu(out)
- out = out.flatten(2).transpose(1, 2)
- return out
-
-
-class Bottleneck(nn.Module):
- expansion = 4
-
- def __init__(self,
- inplanes,
- planes,
- stride=1,
- dilation=1,
- conv_cfg=None,
- norm_cfg=dict(type='BN')):
- """Bottleneck block for ResNet.
- If style is "pytorch", the stride-two layer is the 3x3 conv layer,
- if it is "caffe", the stride-two layer is the first 1x1 conv layer.
- """
- super(Bottleneck, self).__init__()
-
- self.inplanes = inplanes
- self.planes = planes
- self.stride = stride
- self.dilation = dilation
- self.conv_cfg = conv_cfg
- self.norm_cfg = norm_cfg
-
- self.conv1_stride = 1
- self.conv2_stride = stride
-
- self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
- self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
- self.norm3_name, norm3 = build_norm_layer(
- norm_cfg, planes * self.expansion, postfix=3)
-
- self.conv1 = build_conv_layer(
- conv_cfg,
- inplanes,
- planes,
- kernel_size=1,
- stride=self.conv1_stride,
- bias=False)
- self.add_module(self.norm1_name, norm1)
- self.conv2 = build_conv_layer(
- conv_cfg,
- planes,
- planes,
- kernel_size=3,
- stride=self.conv2_stride,
- padding=dilation,
- dilation=dilation,
- bias=False)
- self.add_module(self.norm2_name, norm2)
- self.conv3 = build_conv_layer(
- conv_cfg,
- planes,
- planes * self.expansion,
- kernel_size=1,
- bias=False)
- self.add_module(self.norm3_name, norm3)
-
- self.relu = nn.ReLU(inplace=True)
-
- @property
- def norm1(self):
- return getattr(self, self.norm1_name)
-
- @property
- def norm2(self):
- return getattr(self, self.norm2_name)
-
- @property
- def norm3(self):
- return getattr(self, self.norm3_name)
-
- def forward(self, x, H, W):
- B, _, C = x.shape
- x = x.permute(0, 2, 1).reshape(B, -1, H, W)
- identity = x
-
- out = self.conv1(x)
- out = self.norm1(out)
- out = self.relu(out)
-
- out = self.conv2(out)
- out = self.norm2(out)
- out = self.relu(out)
-
- out = self.conv3(out)
- out = self.norm3(out)
-
- out += identity
- out = self.relu(out)
- out = out.flatten(2).transpose(1, 2)
- return out
-
-
-class Attention(nn.Module):
-
- def __init__(self,
- dim,
- num_heads=8,
- qkv_bias=False,
- qk_scale=None,
- attn_drop=0.,
- proj_drop=0.,
- window_size=None,
- attn_head_dim=None):
- super().__init__()
- self.num_heads = num_heads
- head_dim = dim // num_heads
- if attn_head_dim is not None:
- head_dim = attn_head_dim
- all_head_dim = head_dim * self.num_heads
- # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
- self.scale = qk_scale or head_dim**-0.5
-
- self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
- self.window_size = window_size
- q_size = window_size[0]
- kv_size = q_size
- rel_sp_dim = 2 * q_size - 1
- self.rel_pos_h = nn.Parameter(torch.zeros(rel_sp_dim, head_dim))
- self.rel_pos_w = nn.Parameter(torch.zeros(rel_sp_dim, head_dim))
-
- self.attn_drop = nn.Dropout(attn_drop)
- self.proj = nn.Linear(all_head_dim, dim)
- self.proj_drop = nn.Dropout(proj_drop)
-
- def forward(self, x, H, W, rel_pos_bias=None):
- B, N, C = x.shape
- # qkv_bias = None
- # if self.q_bias is not None:
- # qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
- # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
- qkv = self.qkv(x)
- qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
- q, k, v = qkv[0], qkv[1], qkv[
- 2] # make torchscript happy (cannot use tensor as tuple)
-
- q = q * self.scale
- attn = (q @ k.transpose(-2, -1))
- attn = calc_rel_pos_spatial(attn, q, self.window_size,
- self.window_size, self.rel_pos_h,
- self.rel_pos_w)
- # if self.relative_position_bias_table is not None:
- # relative_position_bias = \
- # self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
- # self.window_size[0] * self.window_size[1] + 1,
- # self.window_size[0] * self.window_size[1] + 1, -1) # Wh*Ww,Wh*Ww,nH
- # relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww
- # attn = attn + relative_position_bias.unsqueeze(0)
-
- # if rel_pos_bias is not None:
- # attn = attn + rel_pos_bias
-
- attn = attn.softmax(dim=-1)
- attn = self.attn_drop(attn)
-
- x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
- x = self.proj(x)
- x = self.proj_drop(x)
- return x
def window_partition(x, window_size):
"""
+ Partition into non-overlapping windows with padding if needed.
Args:
- x: (B, H, W, C)
- window_size (int): window size
+ x (tensor): input tokens with [B, H, W, C].
+ window_size (int): window size.
Returns:
- windows: (num_windows*B, window_size, window_size, C)
+ windows: windows after partition with [B * num_windows, window_size, window_size, C].
+ (Hp, Wp): padded height and width before partition
"""
B, H, W, C = x.shape
- x = x.view(B, H // window_size, window_size, W // window_size, window_size,
- C)
+
+ pad_h = (window_size - H % window_size) % window_size
+ pad_w = (window_size - W % window_size) % window_size
+ if pad_h > 0 or pad_w > 0:
+ x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h))
+ Hp, Wp = H + pad_h, W + pad_w
+
+ x = x.view(B, Hp // window_size, window_size, Wp // window_size,
+ window_size, C)
windows = x.permute(0, 1, 3, 2, 4,
5).contiguous().view(-1, window_size, window_size, C)
- return windows
+ return windows, (Hp, Wp)
-def window_reverse(windows, window_size, H, W):
+def window_unpartition(windows, window_size, pad_hw, hw):
"""
+ Window unpartition into original sequences and removing padding.
Args:
- windows: (num_windows*B, window_size, window_size, C)
- window_size (int): Window size
- H (int): Height of image
- W (int): Width of image
+ x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
+ window_size (int): window size.
+ pad_hw (Tuple): padded height and width (Hp, Wp).
+ hw (Tuple): original height and width (H, W) before padding.
Returns:
- x: (B, H, W, C)
+ x: unpartitioned sequences with [B, H, W, C].
"""
- B = int(windows.shape[0] / (H * W / window_size / window_size))
- x = windows.view(B, H // window_size, W // window_size, window_size,
+ Hp, Wp = pad_hw
+ H, W = hw
+ B = windows.shape[0] // (Hp * Wp // window_size // window_size)
+ x = windows.view(B, Hp // window_size, Wp // window_size, window_size,
window_size, -1)
- x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+ x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1)
+
+ if Hp > H or Wp > W:
+ x = x[:, :H, :W, :].contiguous()
return x
-def calc_rel_pos_spatial(
- attn,
- q,
- q_shape,
- k_shape,
- rel_pos_h,
- rel_pos_w,
-):
+def get_rel_pos(q_size, k_size, rel_pos):
"""
- Spatial Relative Positional Embeddings.
+ Get relative positional embeddings according to the relative positions of
+ query and key sizes.
+ Args:
+ q_size (int): size of query q.
+ k_size (int): size of key k.
+ rel_pos (Tensor): relative position embeddings (L, C).
+ Returns:
+ Extracted positional embeddings according to relative positions.
"""
- sp_idx = 0
- q_h, q_w = q_shape
- k_h, k_w = k_shape
+ max_rel_dist = int(2 * max(q_size, k_size) - 1)
+ # Interpolate rel pos if needed.
+ if rel_pos.shape[0] != max_rel_dist:
+ # Interpolate rel pos.
+ rel_pos_resized = F.interpolate(
+ rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1),
+ size=max_rel_dist,
+ mode='linear',
+ )
+ rel_pos_resized = rel_pos_resized.reshape(-1,
+ max_rel_dist).permute(1, 0)
+ else:
+ rel_pos_resized = rel_pos
- # Scale up rel pos if shapes for q and k are different.
- q_h_ratio = max(k_h / q_h, 1.0)
- k_h_ratio = max(q_h / k_h, 1.0)
- dist_h = (
- torch.arange(q_h)[:, None] * q_h_ratio -
- torch.arange(k_h)[None, :] * k_h_ratio)
- dist_h += (k_h - 1) * k_h_ratio
- q_w_ratio = max(k_w / q_w, 1.0)
- k_w_ratio = max(q_w / k_w, 1.0)
- dist_w = (
- torch.arange(q_w)[:, None] * q_w_ratio -
- torch.arange(k_w)[None, :] * k_w_ratio)
- dist_w += (k_w - 1) * k_w_ratio
+ # Scale the coords with short length if shapes for q and k are different.
+ q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0)
+ k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0)
+ relative_coords = (q_coords -
+ k_coords) + (k_size - 1) * max(q_size / k_size, 1.0)
- Rh = rel_pos_h[dist_h.long()]
- Rw = rel_pos_w[dist_w.long()]
+ return rel_pos_resized[relative_coords.long()]
- B, n_head, q_N, dim = q.shape
- r_q = q[:, :, sp_idx:].reshape(B, n_head, q_h, q_w, dim)
- rel_h = torch.einsum('byhwc,hkc->byhwk', r_q, Rh)
- rel_w = torch.einsum('byhwc,wkc->byhwk', r_q, Rw)
+def add_decomposed_rel_pos(attn, q, rel_pos_h, rel_pos_w, q_size, k_size):
+ """
+ Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`.
+ https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py # noqa B950
+ Args:
+ attn (Tensor): attention map.
+ q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C).
+ rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis.
+ rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis.
+ q_size (Tuple): spatial sequence size of query q with (q_h, q_w).
+ k_size (Tuple): spatial sequence size of key k with (k_h, k_w).
+ Returns:
+ attn (Tensor): attention map with added relative positional embeddings.
+ """
+ q_h, q_w = q_size
+ k_h, k_w = k_size
+ Rh = get_rel_pos(q_h, k_h, rel_pos_h)
+ Rw = get_rel_pos(q_w, k_w, rel_pos_w)
- attn[:, :, sp_idx:, sp_idx:] = (
- attn[:, :, sp_idx:, sp_idx:].view(B, -1, q_h, q_w, k_h, k_w) +
- rel_h[:, :, :, :, :, None] + rel_w[:, :, :, :, None, :]).view(
- B, -1, q_h * q_w, k_h * k_w)
+ B, _, dim = q.shape
+ r_q = q.reshape(B, q_h, q_w, dim)
+ rel_h = torch.einsum('bhwc,hkc->bhwk', r_q, Rh)
+ rel_w = torch.einsum('bhwc,wkc->bhwk', r_q, Rw)
+
+ attn = (attn.view(B, q_h, q_w, k_h, k_w) + rel_h[:, :, :, :, None] +
+ rel_w[:, :, :, None, :]).view(B, q_h * q_w, k_h * k_w)
return attn
-class WindowAttention(nn.Module):
- """ Window based multi-head self attention (W-MSA) module with relative position bias.
- It supports both of shifted and non-shifted window.
+def get_abs_pos(abs_pos, has_cls_token, hw):
+ """
+ Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+ dimension for the original embeddings.
Args:
- dim (int): Number of input channels.
- window_size (tuple[int]): The height and width of the window.
- num_heads (int): Number of attention heads.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
- attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
- proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+ abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+ has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+ hw (Tuple): size of input image tokens.
+ Returns:
+ Absolute positional embeddings after processing with shape (1, H, W, C)
+ """
+ h, w = hw
+ if has_cls_token:
+ abs_pos = abs_pos[:, 1:]
+ xy_num = abs_pos.shape[1]
+ size = int(math.sqrt(xy_num))
+ assert size * size == xy_num
+
+ if size != h or size != w:
+ new_abs_pos = F.interpolate(
+ abs_pos.reshape(1, size, size, -1).permute(0, 3, 1, 2),
+ size=(h, w),
+ mode='bicubic',
+ align_corners=False,
+ )
+
+ return new_abs_pos.permute(0, 2, 3, 1)
+ else:
+ return abs_pos.reshape(1, h, w, -1)
+
+
+class PatchEmbed(nn.Module):
+ """
+ Image to Patch Embedding.
"""
def __init__(self,
- dim,
- window_size,
- num_heads,
- qkv_bias=True,
- qk_scale=None,
- attn_drop=0.,
- proj_drop=0.,
- attn_head_dim=None):
-
+ kernel_size=(16, 16),
+ stride=(16, 16),
+ padding=(0, 0),
+ in_chans=3,
+ embed_dim=768):
+ """
+ Args:
+ kernel_size (Tuple): kernel size of the projection layer.
+ stride (Tuple): stride of the projection layer.
+ padding (Tuple): padding size of the projection layer.
+ in_chans (int): Number of input image channels.
+ embed_dim (int): embed_dim (int): Patch embedding dimension.
+ """
+ super().__init__()
+
+ self.proj = nn.Conv2d(
+ in_chans,
+ embed_dim,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding)
+
+ def forward(self, x):
+ x = self.proj(x)
+ # B C H W -> B H W C
+ x = x.permute(0, 2, 3, 1)
+ return x
+
+
+class Attention(nn.Module):
+ """Multi-head Attention block with relative position embeddings."""
+
+ def __init__(
+ self,
+ dim,
+ num_heads=8,
+ qkv_bias=True,
+ use_rel_pos=False,
+ rel_pos_zero_init=True,
+ input_size=None,
+ ):
+ """
+ Args:
+ dim (int): Number of input channels.
+ num_heads (int): Number of attention heads.
+ qkv_bias (bool: If True, add a learnable bias to query, key, value.
+ rel_pos (bool): If True, add relative positional embeddings to the attention map.
+ rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+ input_size (int or None): Input resolution for calculating the relative positional
+ parameter size.
+ """
super().__init__()
- self.dim = dim
- self.window_size = window_size # Wh, Ww
self.num_heads = num_heads
head_dim = dim // num_heads
- self.scale = qk_scale or head_dim**-0.5
-
- q_size = window_size[0]
- kv_size = window_size[1]
- rel_sp_dim = 2 * q_size - 1
- self.rel_pos_h = nn.Parameter(torch.zeros(rel_sp_dim, head_dim))
- self.rel_pos_w = nn.Parameter(torch.zeros(rel_sp_dim, head_dim))
+ self.scale = head_dim**-0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
- self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
- self.proj_drop = nn.Dropout(proj_drop)
- # trunc_normal_(self.relative_position_bias_table, std=.02)
- self.softmax = nn.Softmax(dim=-1)
+ self.use_rel_pos = use_rel_pos
+ if self.use_rel_pos:
+ # initialize relative positional embeddings
+ self.rel_pos_h = nn.Parameter(
+ torch.zeros(2 * input_size[0] - 1, head_dim))
+ self.rel_pos_w = nn.Parameter(
+ torch.zeros(2 * input_size[1] - 1, head_dim))
- def forward(self, x, H, W):
- """ Forward function.
- Args:
- x: input features with shape of (num_windows*B, N, C)
- mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
- """
- B_, N, C = x.shape
- x = x.reshape(B_, H, W, C)
- pad_l = pad_t = 0
- pad_r = (self.window_size[1] -
- W % self.window_size[1]) % self.window_size[1]
- pad_b = (self.window_size[0] -
- H % self.window_size[0]) % self.window_size[0]
+ if not rel_pos_zero_init:
+ trunc_normal_(self.rel_pos_h, std=0.02)
+ trunc_normal_(self.rel_pos_w, std=0.02)
- x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
- _, Hp, Wp, _ = x.shape
+ def forward(self, x):
+ B, H, W, _ = x.shape
+ # qkv with shape (3, B, nHead, H * W, C)
+ qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads,
+ -1).permute(2, 0, 3, 1, 4)
+ # q, k, v with shape (B * nHead, H * W, C)
+ q, k, v = qkv.reshape(3, B * self.num_heads, H * W, -1).unbind(0)
- x = window_partition(
- x, self.window_size[0]) # nW*B, window_size, window_size, C
- x = x.view(-1, self.window_size[1] * self.window_size[0],
- C) # nW*B, window_size*window_size, C
- B_w = x.shape[0]
- N_w = x.shape[1]
- qkv = self.qkv(x).reshape(B_w, N_w, 3, self.num_heads,
- C // self.num_heads).permute(2, 0, 3, 1, 4)
- q, k, v = qkv[0], qkv[1], qkv[
- 2] # make torchscript happy (cannot use tensor as tuple)
+ attn = (q * self.scale) @ k.transpose(-2, -1)
- q = q * self.scale
- attn = (q @ k.transpose(-2, -1))
+ if self.use_rel_pos:
+ attn = add_decomposed_rel_pos(attn, q, self.rel_pos_h,
+ self.rel_pos_w, (H, W), (H, W))
- attn = calc_rel_pos_spatial(attn, q, self.window_size,
- self.window_size, self.rel_pos_h,
- self.rel_pos_w)
-
- attn = self.softmax(attn)
-
- attn = self.attn_drop(attn)
-
- x = (attn @ v).transpose(1, 2).reshape(B_w, N_w, C)
+ attn = attn.softmax(dim=-1)
+ x = (attn @ v).view(B, self.num_heads, H, W,
+ -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1)
x = self.proj(x)
- x = self.proj_drop(x)
-
- x = x.view(-1, self.window_size[1], self.window_size[0], C)
- x = window_reverse(x, self.window_size[0], Hp, Wp) # B H' W' C
-
- if pad_r > 0 or pad_b > 0:
- x = x[:, :H, :W, :].contiguous()
-
- x = x.view(B_, H * W, C)
return x
class Block(nn.Module):
+ """Transformer blocks with support of window attention and residual propagation blocks"""
- def __init__(self,
- dim,
- num_heads,
- mlp_ratio=4.,
- qkv_bias=False,
- qk_scale=None,
- drop=0.,
- attn_drop=0.,
- drop_path=0.,
- init_values=None,
- act_layer=nn.GELU,
- norm_layer=nn.LayerNorm,
- window_size=None,
- attn_head_dim=None,
- window=False,
- aggregation='attn'):
+ def __init__(
+ self,
+ dim,
+ num_heads,
+ mlp_ratio=4.0,
+ qkv_bias=True,
+ drop_path=0.0,
+ norm_layer=nn.LayerNorm,
+ act_layer=nn.GELU,
+ use_rel_pos=False,
+ rel_pos_zero_init=True,
+ window_size=0,
+ use_residual_block=False,
+ input_size=None,
+ ):
+ """
+ Args:
+ dim (int): Number of input channels.
+ num_heads (int): Number of attention heads in each ViT block.
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+ qkv_bias (bool): If True, add a learnable bias to query, key, value.
+ drop_path (float): Stochastic depth rate.
+ norm_layer (nn.Module): Normalization layer.
+ act_layer (nn.Module): Activation layer.
+ use_rel_pos (bool): If True, add relative positional embeddings to the attention map.
+ rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+ window_size (int): Window size for window attention blocks. If it equals 0, then not
+ use window attention.
+ use_residual_block (bool): If True, use a residual block after the MLP block.
+ input_size (int or None): Input resolution for calculating the relative positional
+ parameter size.
+ """
super().__init__()
self.norm1 = norm_layer(dim)
- self.aggregation = aggregation
- self.window = window
- if not window:
- if aggregation == 'attn':
- self.attn = Attention(
- dim,
- num_heads=num_heads,
- qkv_bias=qkv_bias,
- qk_scale=qk_scale,
- attn_drop=attn_drop,
- proj_drop=drop,
- window_size=window_size,
- attn_head_dim=attn_head_dim)
- else:
- self.attn = WindowAttention(
- dim,
- num_heads=num_heads,
- qkv_bias=qkv_bias,
- qk_scale=qk_scale,
- attn_drop=attn_drop,
- proj_drop=drop,
- window_size=window_size,
- attn_head_dim=attn_head_dim)
- if aggregation == 'basicblock':
- self.conv_aggregation = BasicBlock(
- inplanes=dim, planes=dim)
- elif aggregation == 'bottleneck':
- self.conv_aggregation = Bottleneck(
- inplanes=dim, planes=dim // 4)
- else:
- self.attn = WindowAttention(
- dim,
- num_heads=num_heads,
- qkv_bias=qkv_bias,
- qk_scale=qk_scale,
- attn_drop=attn_drop,
- proj_drop=drop,
- window_size=window_size,
- attn_head_dim=attn_head_dim)
- # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.attn = Attention(
+ dim,
+ num_heads=num_heads,
+ qkv_bias=qkv_bias,
+ use_rel_pos=use_rel_pos,
+ rel_pos_zero_init=rel_pos_zero_init,
+ input_size=input_size if window_size == 0 else
+ (window_size, window_size),
+ )
+
self.drop_path = DropPath(
- drop_path) if drop_path > 0. else nn.Identity()
+ drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
- mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(
in_features=dim,
- hidden_features=mlp_hidden_dim,
- act_layer=act_layer,
- drop=drop)
+ hidden_features=int(dim * mlp_ratio),
+ act_layer=act_layer)
- if init_values is not None:
- self.gamma_1 = nn.Parameter(
- init_values * torch.ones((dim)), requires_grad=True)
- self.gamma_2 = nn.Parameter(
- init_values * torch.ones((dim)), requires_grad=True)
- else:
- self.gamma_1, self.gamma_2 = None, None
+ self.window_size = window_size
- def forward(self, x, H, W):
- if self.gamma_1 is None:
- x = x + self.drop_path(self.attn(self.norm1(x), H, W))
- x = x + self.drop_path(self.mlp(self.norm2(x)))
- else:
- x = x + self.drop_path(
- self.gamma_1 * self.attn(self.norm1(x), H, W))
- x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
- if not self.window and self.aggregation != 'attn':
- x = self.conv_aggregation(x, H, W)
- return x
-
-
-class PatchEmbed(nn.Module):
- """ Image to Patch Embedding
- """
-
- def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
- super().__init__()
- img_size = to_2tuple(img_size)
- patch_size = to_2tuple(patch_size)
- num_patches = (img_size[1] // patch_size[1]) * (
- img_size[0] // patch_size[0])
- self.patch_shape = (img_size[0] // patch_size[0],
- img_size[1] // patch_size[1])
- self.img_size = img_size
- self.patch_size = patch_size
- self.num_patches = num_patches
-
- self.proj = nn.Conv2d(
- in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
-
- def forward(self, x, **kwargs):
- B, C, H, W = x.shape
- # FIXME look at relaxing size constraints
- # assert H == self.img_size[0] and W == self.img_size[1], \
- # f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
- x = self.proj(x)
- Hp, Wp = x.shape[2], x.shape[3]
-
- x = x.flatten(2).transpose(1, 2)
- return x, (Hp, Wp)
-
-
-class HybridEmbed(nn.Module):
- """ CNN Feature Map Embedding
- Extract feature map from CNN, flatten, project to embedding dim.
- """
-
- def __init__(self,
- backbone,
- img_size=224,
- feature_size=None,
- in_chans=3,
- embed_dim=768):
- super().__init__()
- assert isinstance(backbone, nn.Module)
- img_size = to_2tuple(img_size)
- self.img_size = img_size
- self.backbone = backbone
- if feature_size is None:
- with torch.no_grad():
- # FIXME this is hacky, but most reliable way of determining the exact dim of the output feature
- # map for all networks, the feature metadata has reliable channel and stride info, but using
- # stride to calc feature dim requires info about padding of each stage that isn't captured.
- training = backbone.training
- if training:
- backbone.eval()
- o = self.backbone(
- torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
- feature_size = o.shape[-2:]
- feature_dim = o.shape[1]
- backbone.train(training)
- else:
- feature_size = to_2tuple(feature_size)
- feature_dim = self.backbone.feature_info.channels()[-1]
- self.num_patches = feature_size[0] * feature_size[1]
- self.proj = nn.Linear(feature_dim, embed_dim)
+ self.use_residual_block = use_residual_block
def forward(self, x):
- x = self.backbone(x)[-1]
- x = x.flatten(2).transpose(1, 2)
- x = self.proj(x)
+ shortcut = x
+ x = self.norm1(x)
+ # Window partition
+ if self.window_size > 0:
+ H, W = x.shape[1], x.shape[2]
+ x, pad_hw = window_partition(x, self.window_size)
+
+ x = self.attn(x)
+ # Reverse window partition
+ if self.window_size > 0:
+ x = window_unpartition(x, self.window_size, pad_hw, (H, W))
+
+ x = shortcut + self.drop_path(x)
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+ if self.use_residual_block:
+ x = self.residual(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1)
+
return x
-class Norm2d(nn.Module):
-
- def __init__(self, embed_dim):
- super().__init__()
- self.ln = nn.LayerNorm(embed_dim, eps=1e-6)
-
- def forward(self, x):
- x = x.permute(0, 2, 3, 1)
- x = self.ln(x)
- x = x.permute(0, 3, 1, 2).contiguous()
- return x
-
-
-# todo: refactor vitdet and vit_transformer_dynamic
@BACKBONES.register_module()
class ViTDet(nn.Module):
- """ Vision Transformer with support for patch or hybrid CNN input stage
+ """
+ This module implements Vision Transformer (ViT) backbone in :paper:`vitdet`.
+ "Exploring Plain Vision Transformer Backbones for Object Detection",
+ https://arxiv.org/abs/2203.16527
"""
- def __init__(self,
- img_size=224,
- patch_size=16,
- in_chans=3,
- num_classes=80,
- embed_dim=768,
- depth=12,
- num_heads=12,
- mlp_ratio=4.,
- qkv_bias=False,
- qk_scale=None,
- drop_rate=0.,
- attn_drop_rate=0.,
- drop_path_rate=0.,
- hybrid_backbone=None,
- norm_layer=None,
- init_values=None,
- use_checkpoint=False,
- use_abs_pos_emb=False,
- use_rel_pos_bias=False,
- use_shared_rel_pos_bias=False,
- out_indices=[11],
- interval=3,
- pretrained=None,
- aggregation='attn'):
+ def __init__(
+ self,
+ img_size=1024,
+ patch_size=16,
+ in_chans=3,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ mlp_ratio=4.0,
+ qkv_bias=True,
+ drop_path_rate=0.0,
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
+ act_layer=nn.GELU,
+ use_abs_pos=True,
+ use_rel_pos=False,
+ rel_pos_zero_init=True,
+ window_size=0,
+ window_block_indexes=(),
+ residual_block_indexes=(),
+ use_act_checkpoint=False,
+ pretrain_img_size=224,
+ pretrain_use_cls_token=True,
+ pretrained=None,
+ ):
+ """
+ Args:
+ img_size (int): Input image size.
+ patch_size (int): Patch size.
+ in_chans (int): Number of input image channels.
+ embed_dim (int): Patch embedding dimension.
+ depth (int): Depth of ViT.
+ num_heads (int): Number of attention heads in each ViT block.
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+ qkv_bias (bool): If True, add a learnable bias to query, key, value.
+ drop_path_rate (float): Stochastic depth rate.
+ norm_layer (nn.Module): Normalization layer.
+ act_layer (nn.Module): Activation layer.
+ use_abs_pos (bool): If True, use absolute positional embeddings.
+ use_rel_pos (bool): If True, add relative positional embeddings to the attention map.
+ rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+ window_size (int): Window size for window attention blocks.
+ window_block_indexes (list): Indexes for blocks using window attention.
+ residual_block_indexes (list): Indexes for blocks using conv propagation.
+ use_act_checkpoint (bool): If True, use activation checkpointing.
+ pretrain_img_size (int): input image size for pretraining models.
+ pretrain_use_cls_token (bool): If True, pretrainig models use class token.
+ """
super().__init__()
- norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
- self.num_classes = num_classes
- self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
+ self.pretrain_use_cls_token = pretrain_use_cls_token
+ self.use_act_checkpoint = use_act_checkpoint
- if hybrid_backbone is not None:
- self.patch_embed = HybridEmbed(
- hybrid_backbone,
- img_size=img_size,
- in_chans=in_chans,
- embed_dim=embed_dim)
- else:
- self.patch_embed = PatchEmbed(
- img_size=img_size,
- patch_size=patch_size,
- in_chans=in_chans,
- embed_dim=embed_dim)
+ self.patch_embed = PatchEmbed(
+ kernel_size=(patch_size, patch_size),
+ stride=(patch_size, patch_size),
+ in_chans=in_chans,
+ embed_dim=embed_dim,
+ )
- num_patches = self.patch_embed.num_patches
-
- self.out_indices = out_indices
-
- if use_abs_pos_emb:
+ if use_abs_pos:
+ # Initialize absolute positional embedding with pretrain image size.
+ num_patches = (pretrain_img_size // patch_size) * (
+ pretrain_img_size // patch_size)
+ num_positions = (num_patches +
+ 1) if pretrain_use_cls_token else num_patches
self.pos_embed = nn.Parameter(
- torch.zeros(1, num_patches, embed_dim))
+ torch.zeros(1, num_positions, embed_dim))
else:
self.pos_embed = None
- self.pos_drop = nn.Dropout(p=drop_rate)
+ # stochastic depth decay rule
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
- dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
- ] # stochastic depth decay rule
- self.use_rel_pos_bias = use_rel_pos_bias
- self.use_checkpoint = use_checkpoint
- self.blocks = nn.ModuleList([
- Block(
+ self.blocks = nn.ModuleList()
+ for i in range(depth):
+ block = Block(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
- qk_scale=qk_scale,
- drop=drop_rate,
- attn_drop=attn_drop_rate,
drop_path=dpr[i],
norm_layer=norm_layer,
- init_values=init_values,
- window_size=(14, 14) if
- ((i + 1) % interval != 0
- or aggregation != 'attn') else self.patch_embed.patch_shape,
- window=((i + 1) % interval != 0),
- aggregation=aggregation) for i in range(depth)
- ])
+ act_layer=act_layer,
+ use_rel_pos=use_rel_pos,
+ rel_pos_zero_init=rel_pos_zero_init,
+ window_size=window_size if i in window_block_indexes else 0,
+ use_residual_block=i in residual_block_indexes,
+ input_size=(img_size // patch_size, img_size // patch_size),
+ )
+ self.blocks.append(block)
if self.pos_embed is not None:
- trunc_normal_(self.pos_embed, std=.02)
-
- self.norm = norm_layer(embed_dim)
+ trunc_normal_(self.pos_embed, std=0.02)
+ self.apply(self._init_weights)
self.pretrained = pretrained
- self._register_load_state_dict_pre_hook(self._prepare_checkpoint_hook)
- def fix_init_weight(self):
-
- def rescale(param, layer_id):
- param.div_(math.sqrt(2.0 * layer_id))
-
- for layer_id, layer in enumerate(self.blocks):
- rescale(layer.attn.proj.weight.data, layer_id + 1)
- rescale(layer.mlp.fc2.weight.data, layer_id + 1)
-
- def init_weights(self, pretrained=None):
- """Initialize the weights in backbone.
- Args:
- pretrained (str, optional): Path to pre-trained weights.
- Defaults to None.
- """
- self.fix_init_weight()
- pretrained = pretrained or self.pretrained
-
- def _init_weights(m):
- if isinstance(m, nn.Linear):
- trunc_normal_(m.weight, std=.02)
- if isinstance(m, nn.Linear) and m.bias is not None:
- nn.init.constant_(m.bias, 0)
- elif isinstance(m, nn.LayerNorm):
+ def _init_weights(self, m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=0.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
- nn.init.constant_(m.weight, 1.0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
- if isinstance(m, nn.Conv2d):
- kaiming_init(m, mode='fan_in', nonlinearity='relu')
- elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
- constant_init(m, 1)
-
- if isinstance(m, Bottleneck):
- constant_init(m.norm3, 0)
- elif isinstance(m, BasicBlock):
- constant_init(m.norm2, 0)
-
- if isinstance(pretrained, str):
- self.apply(_init_weights)
+ def init_weights(self):
+ if isinstance(self.pretrained, str):
logger = get_root_logger()
- load_checkpoint(self, pretrained, strict=False, logger=logger)
- elif pretrained is None:
- self.apply(_init_weights)
- else:
- raise TypeError('pretrained must be a str or None')
-
- def _prepare_checkpoint_hook(self, state_dict, prefix, *args, **kwargs):
- rank, _ = get_dist_info()
- if 'pos_embed' in state_dict:
- pos_embed_checkpoint = state_dict['pos_embed']
- embedding_size = pos_embed_checkpoint.shape[-1]
- H, W = self.patch_embed.patch_shape
- num_patches = self.patch_embed.num_patches
- num_extra_tokens = 1
- # height (== width) for the checkpoint position embedding
- orig_size = int(
- (pos_embed_checkpoint.shape[-2] - num_extra_tokens)**0.5)
- # height (== width) for the new position embedding
- new_size = int(num_patches**0.5)
- # class_token and dist_token are kept unchanged
- if orig_size != new_size:
- if rank == 0:
- print('Position interpolate from %dx%d to %dx%d' %
- (orig_size, orig_size, H, W))
- # extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
- # only the position tokens are interpolated
- pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
- pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size,
- embedding_size).permute(
- 0, 3, 1, 2)
- pos_tokens = torch.nn.functional.interpolate(
- pos_tokens,
- size=(H, W),
- mode='bicubic',
- align_corners=False)
- new_pos_embed = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
- # new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
- state_dict['pos_embed'] = new_pos_embed
-
- def get_num_layers(self):
- return len(self.blocks)
-
- @torch.jit.ignore
- def no_weight_decay(self):
- return {'pos_embed', 'cls_token'}
-
- def forward_features(self, x):
- B, C, H, W = x.shape
- x, (Hp, Wp) = self.patch_embed(x)
- batch_size, seq_len, _ = x.size()
-
- if self.pos_embed is not None:
- x = x + self.pos_embed
- x = self.pos_drop(x)
-
- outs = []
- for i, blk in enumerate(self.blocks):
- if self.use_checkpoint:
- x = checkpoint.checkpoint(blk, x)
- else:
- x = blk(x, Hp, Wp)
-
- x = self.norm(x)
- xp = x.permute(0, 2, 1).reshape(B, -1, Hp, Wp)
-
- outs.append(xp)
-
- return tuple(outs)
+ load_checkpoint(self, self.pretrained, strict=False, logger=logger)
def forward(self, x):
- x = self.forward_features(x)
- return x
+ x = self.patch_embed(x)
+ if self.pos_embed is not None:
+ x = x + get_abs_pos(self.pos_embed, self.pretrain_use_cls_token,
+ (x.shape[1], x.shape[2]))
+
+ for blk in self.blocks:
+ if self.use_act_checkpoint:
+ x = checkpoint.checkpoint(blk, x)
+ else:
+ x = blk(x)
+
+ outputs = [x.permute(0, 3, 1, 2)]
+ return outputs
diff --git a/easycv/models/detection/necks/fpn.py b/easycv/models/detection/necks/fpn.py
index 6d14bbef..8018903c 100644
--- a/easycv/models/detection/necks/fpn.py
+++ b/easycv/models/detection/necks/fpn.py
@@ -37,7 +37,6 @@ class FPN(nn.Module):
Default: None.
upsample_cfg (dict): Config dict for interpolate layer.
Default: dict(mode='nearest').
- init_cfg (dict or list[dict], optional): Initialization config dict.
Example:
>>> import torch
>>> in_channels = [2, 3, 5, 7]
@@ -67,8 +66,6 @@ class FPN(nn.Module):
norm_cfg=None,
act_cfg=None,
upsample_cfg=dict(mode='nearest')):
- # init_cfg=dict(
- # type='Xavier', layer='Conv2d', distribution='uniform')):
super(FPN, self).__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
diff --git a/easycv/models/detection/necks/sfp.py b/easycv/models/detection/necks/sfp.py
index be1273b0..b588f643 100644
--- a/easycv/models/detection/necks/sfp.py
+++ b/easycv/models/detection/necks/sfp.py
@@ -2,26 +2,12 @@
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule
-from mmcv.runner import BaseModule
from easycv.models.builder import NECKS
-class Norm2d(nn.Module):
-
- def __init__(self, embed_dim):
- super().__init__()
- self.ln = nn.LayerNorm(embed_dim, eps=1e-6)
-
- def forward(self, x):
- x = x.permute(0, 2, 3, 1)
- x = self.ln(x)
- x = x.permute(0, 3, 1, 2).contiguous()
- return x
-
-
@NECKS.register_module()
-class SFP(BaseModule):
+class SFP(nn.Module):
r"""Simple Feature Pyramid.
This is an implementation of paper `Exploring Plain Vision Transformer Backbones for Object Detection `_.
Args:
@@ -32,25 +18,12 @@ class SFP(BaseModule):
build the feature pyramid. Default: 0.
end_level (int): Index of the end input backbone level (exclusive) to
build the feature pyramid. Default: -1, which means the last level.
- add_extra_convs (bool | str): If bool, it decides whether to add conv
- layers on top of the original feature maps. Default to False.
- If True, it is equivalent to `add_extra_convs='on_input'`.
- If str, it specifies the source feature map of the extra convs.
- Only the following options are allowed
- - 'on_input': Last feat map of neck inputs (i.e. backbone feature).
- - 'on_lateral': Last feature map after lateral convs.
- - 'on_output': The last output feature map after fpn convs.
- relu_before_extra_convs (bool): Whether to apply relu before the extra
conv. Default: False.
- no_norm_on_lateral (bool): Whether to apply norm on lateral.
Default: False.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Config dict for normalization layer. Default: None.
act_cfg (str): Config dict for activation layer in ConvModule.
Default: None.
- upsample_cfg (dict): Config dict for interpolate layer.
- Default: `dict(mode='nearest')`
- init_cfg (dict or list[dict], optional): Initialization config dict.
Example:
>>> import torch
>>> in_channels = [2, 3, 5, 7]
@@ -70,158 +43,83 @@ class SFP(BaseModule):
def __init__(self,
in_channels,
out_channels,
+ scale_factors,
num_outs,
- start_level=0,
- end_level=-1,
- add_extra_convs=False,
- relu_before_extra_convs=False,
- no_norm_on_lateral=False,
conv_cfg=None,
norm_cfg=None,
- act_cfg=None,
- upsample_cfg=dict(mode='nearest'),
- init_cfg=[
- dict(
- type='Xavier',
- layer=['Conv2d'],
- distribution='uniform'),
- dict(type='Constant', layer=['LayerNorm'], val=1, bias=0)
- ]):
- super(SFP, self).__init__(init_cfg)
- assert isinstance(in_channels, list)
- self.in_channels = in_channels
+ act_cfg=None):
+ super(SFP, self).__init__()
+ dim = in_channels
self.out_channels = out_channels
- self.num_ins = len(in_channels)
+ self.scale_factors = scale_factors
+ self.num_ins = len(scale_factors)
self.num_outs = num_outs
- self.relu_before_extra_convs = relu_before_extra_convs
- self.no_norm_on_lateral = no_norm_on_lateral
- self.upsample_cfg = upsample_cfg.copy()
- if end_level == -1:
- self.backbone_end_level = self.num_ins
- assert num_outs >= self.num_ins - start_level
- else:
- # if end_level < inputs, no extra level is allowed
- self.backbone_end_level = end_level
- assert end_level <= len(in_channels)
- assert num_outs == end_level - start_level
- self.start_level = start_level
- self.end_level = end_level
- self.add_extra_convs = add_extra_convs
- assert isinstance(add_extra_convs, (str, bool))
- if isinstance(add_extra_convs, str):
- # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
- assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
- elif add_extra_convs: # True
- self.add_extra_convs = 'on_input'
-
- self.top_downs = nn.ModuleList()
- self.lateral_convs = nn.ModuleList()
- self.fpn_convs = nn.ModuleList()
-
- for i in range(self.start_level, self.backbone_end_level):
- if i == 0:
- top_down = nn.Sequential(
+ self.stages = []
+ for idx, scale in enumerate(scale_factors):
+ out_dim = dim
+ if scale == 4.0:
+ layers = [
+ nn.ConvTranspose2d(dim, dim // 2, 2, stride=2, padding=0),
+ nn.GroupNorm(1, dim // 2, eps=1e-6),
+ nn.GELU(),
nn.ConvTranspose2d(
- in_channels[i], in_channels[i], 2, stride=2,
- padding=0), Norm2d(in_channels[i]), nn.GELU(),
- nn.ConvTranspose2d(
- in_channels[i], in_channels[i], 2, stride=2,
- padding=0))
- elif i == 1:
- top_down = nn.ConvTranspose2d(
- in_channels[i], in_channels[i], 2, stride=2, padding=0)
- elif i == 2:
- top_down = nn.Identity()
- elif i == 3:
- top_down = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+ dim // 2, dim // 4, 2, stride=2, padding=0)
+ ]
+ out_dim = dim // 4
+ elif scale == 2.0:
+ layers = [
+ nn.ConvTranspose2d(dim, dim // 2, 2, stride=2, padding=0)
+ ]
+ out_dim = dim // 2
+ elif scale == 1.0:
+ layers = []
+ elif scale == 0.5:
+ layers = [nn.MaxPool2d(kernel_size=2, stride=2, padding=0)]
+ else:
+ raise NotImplementedError(
+ f'scale_factor={scale} is not supported yet.')
- l_conv = ConvModule(
- in_channels[i],
- out_channels,
- 1,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
- act_cfg=act_cfg,
- inplace=False)
- fpn_conv = ConvModule(
- out_channels,
- out_channels,
- 3,
- padding=1,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg,
- inplace=False)
-
- self.top_downs.append(top_down)
- self.lateral_convs.append(l_conv)
- self.fpn_convs.append(fpn_conv)
-
- # add extra conv layers (e.g., RetinaNet)
- extra_levels = num_outs - self.backbone_end_level + self.start_level
- if self.add_extra_convs and extra_levels >= 1:
- for i in range(extra_levels):
- if i == 0 and self.add_extra_convs == 'on_input':
- in_channels = self.in_channels[self.backbone_end_level - 1]
- else:
- in_channels = out_channels
- extra_fpn_conv = ConvModule(
- in_channels,
+ layers.extend([
+ ConvModule(
+ out_dim,
+ out_channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg,
+ inplace=False),
+ ConvModule(
+ out_channels,
out_channels,
3,
- stride=2,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
- self.fpn_convs.append(extra_fpn_conv)
+ ])
+
+ layers = nn.Sequential(*layers)
+ self.add_module(f'sfp_{idx}', layers)
+ self.stages.append(layers)
+
+ def init_weights(self):
+ pass
def forward(self, inputs):
"""Forward function."""
- assert len(inputs) == 1
+ features = inputs[0]
+ outs = []
- # build top-down path
- features = [
- top_down(inputs[0]) for _, top_down in enumerate(self.top_downs)
- ]
- assert len(features) == len(self.in_channels)
+ # part 1: build simple feature pyramid
+ for stage in self.stages:
+ outs.append(stage(features))
- # build laterals
- laterals = [
- lateral_conv(features[i + self.start_level])
- for i, lateral_conv in enumerate(self.lateral_convs)
- ]
-
- used_backbone_levels = len(laterals)
-
- # build outputs
- # part 1: from original levels
- outs = [
- self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
- ]
# part 2: add extra levels
- if self.num_outs > len(outs):
+ if self.num_outs > self.num_ins:
# use max pool to get more levels on top of outputs
# (e.g., Faster R-CNN, Mask R-CNN)
- if not self.add_extra_convs:
- for i in range(self.num_outs - used_backbone_levels):
- outs.append(F.max_pool2d(outs[-1], 1, stride=2))
- # add conv layers on top of original feature maps (RetinaNet)
- else:
- if self.add_extra_convs == 'on_input':
- extra_source = inputs[self.backbone_end_level - 1]
- elif self.add_extra_convs == 'on_lateral':
- extra_source = laterals[-1]
- elif self.add_extra_convs == 'on_output':
- extra_source = outs[-1]
- else:
- raise NotImplementedError
- outs.append(self.fpn_convs[used_backbone_levels](extra_source))
- for i in range(used_backbone_levels + 1, self.num_outs):
- if self.relu_before_extra_convs:
- outs.append(self.fpn_convs[i](F.relu(outs[-1])))
- else:
- outs.append(self.fpn_convs[i](outs[-1]))
+ for i in range(self.num_outs - self.num_ins):
+ outs.append(F.max_pool2d(outs[-1], 1, stride=2))
return tuple(outs)
diff --git a/easycv/predictors/detector.py b/easycv/predictors/detector.py
index f9d05992..017d671e 100644
--- a/easycv/predictors/detector.py
+++ b/easycv/predictors/detector.py
@@ -253,11 +253,11 @@ class DetrPredictor(PredictorInterface):
img,
bboxes,
labels=labels,
- colors='green',
- text_color='white',
- font_size=20,
- thickness=1,
- font_scale=0.5,
+ colors='cyan',
+ text_color='cyan',
+ font_size=18,
+ thickness=2,
+ font_scale=0.0,
show=show,
out_file=out_file)
diff --git a/tests/models/backbones/test_vitdet.py b/tests/models/backbones/test_vitdet.py
index 3f0350a2..82012aed 100644
--- a/tests/models/backbones/test_vitdet.py
+++ b/tests/models/backbones/test_vitdet.py
@@ -14,18 +14,27 @@ class ViTDetTest(unittest.TestCase):
def test_vitdet(self):
model = ViTDet(
img_size=1024,
+ patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
+ drop_path_rate=0.1,
+ window_size=14,
mlp_ratio=4,
qkv_bias=True,
- qk_scale=None,
- drop_rate=0.,
- attn_drop_rate=0.,
- drop_path_rate=0.1,
- use_abs_pos_emb=True,
- aggregation='attn',
- )
+ window_block_indexes=[
+ # 2, 5, 8 11 for global attention
+ 0,
+ 1,
+ 3,
+ 4,
+ 6,
+ 7,
+ 9,
+ 10,
+ ],
+ residual_block_indexes=[],
+ use_rel_pos=True)
model.init_weights()
model.train()
diff --git a/tests/predictors/test_detector.py b/tests/predictors/test_detector.py
index 9187d3a7..c3be2ed6 100644
--- a/tests/predictors/test_detector.py
+++ b/tests/predictors/test_detector.py
@@ -155,7 +155,7 @@ class DetectorTest(unittest.TestCase):
decimal=1)
def test_vitdet_detector(self):
- model_path = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/vitdet_maskrcnn_export.pth'
+ model_path = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100_export.pth'
img = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/demo/demo.jpg'
out_file = './result.jpg'
vitdet = DetrPredictor(model_path)
@@ -167,63 +167,170 @@ class DetectorTest(unittest.TestCase):
self.assertIn('detection_classes', output)
self.assertIn('detection_masks', output)
self.assertIn('img_metas', output)
- self.assertEqual(len(output['detection_boxes'][0]), 30)
- self.assertEqual(len(output['detection_scores'][0]), 30)
- self.assertEqual(len(output['detection_classes'][0]), 30)
+ self.assertEqual(len(output['detection_boxes'][0]), 33)
+ self.assertEqual(len(output['detection_scores'][0]), 33)
+ self.assertEqual(len(output['detection_classes'][0]), 33)
self.assertListEqual(
output['detection_classes'][0].tolist(),
np.array([
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 7, 7, 13, 13, 13, 56
+ 2, 2, 2, 2, 2, 2, 7, 7, 13, 13, 13, 56
],
dtype=np.int32).tolist())
assert_array_almost_equal(
output['detection_scores'][0],
np.array([
- 0.99791867, 0.99665856, 0.99480623, 0.99060905, 0.9882515,
- 0.98319584, 0.9738879, 0.97290784, 0.9514897, 0.95104814,
- 0.9321701, 0.86165, 0.8228847, 0.7623552, 0.76129806,
- 0.6050861, 0.44348577, 0.3452973, 0.2895671, 0.22109479,
- 0.21265312, 0.17855245, 0.1205352, 0.08981906, 0.10596471,
- 0.05854294, 0.99749386, 0.9472857, 0.5945908, 0.09855112
+ 0.9975854158401489, 0.9965696334838867, 0.9922919869422913,
+ 0.9833580851554871, 0.983080267906189, 0.970454752445221,
+ 0.9701289534568787, 0.9649872183799744, 0.9642795324325562,
+ 0.9642238020896912, 0.9529680609703064, 0.9403366446495056,
+ 0.9391788244247437, 0.8941807150840759, 0.8178097009658813,
+ 0.8013413548469543, 0.6677654385566711, 0.3952914774417877,
+ 0.33463895320892334, 0.32501447200775146, 0.27323535084724426,
+ 0.20197080075740814, 0.15607696771621704, 0.1068163588643074,
+ 0.10183875262737274, 0.09735643863677979, 0.06559795141220093,
+ 0.08890066295862198, 0.076363705098629, 0.9954648613929749,
+ 0.9212945699691772, 0.5224372148513794, 0.20555885136127472
],
dtype=np.float32),
decimal=2)
assert_array_almost_equal(
output['detection_boxes'][0],
- np.array([[294.7058, 117.29371, 378.83713, 149.99928],
- [609.05444, 112.526474, 633.2971, 136.35175],
- [481.4165, 110.987335, 522.5531, 130.01529],
- [167.68184, 109.89049, 215.49057, 139.86987],
- [374.75082, 110.68697, 433.10028, 136.23654],
- [189.54971, 110.09322, 297.6167, 155.77412],
- [266.5185, 105.37718, 326.54385, 127.916374],
- [556.30225, 110.43166, 592.8248, 128.03764],
- [432.49252, 105.086464, 484.0512, 132.272],
- [0., 110.566444, 62.01249, 146.44017],
- [591.74664, 110.43527, 619.73816, 126.68549],
- [99.126854, 90.947975, 118.46699, 101.11096],
- [59.895264, 94.110054, 85.60521, 106.67633],
- [142.95819, 96.61966, 165.96964, 104.95929],
- [83.062515, 89.802605, 99.1546, 98.69074],
- [226.28802, 98.32568, 249.06772, 108.86408],
- [136.67789, 94.75706, 154.62924, 104.289536],
- [170.42459, 98.458694, 183.16309, 106.203156],
- [67.56731, 89.68286, 82.62955, 98.35645],
- [222.80092, 97.828445, 239.02655, 108.29377],
- [134.34427, 92.31653, 149.19615, 102.97457],
- [613.5186, 102.27066, 636.0434, 112.813644],
- [607.4787, 110.87984, 630.1123, 127.65646],
- [135.13664, 90.989876, 155.67192, 100.18036],
- [431.61505, 105.43844, 484.36508, 132.50078],
- [189.92722, 110.38832, 297.74353, 155.95557],
- [220.67035, 177.13489, 455.32092, 380.45712],
- [372.76584, 134.33807, 432.44357, 188.51534],
- [50.403812, 110.543495, 70.4368, 119.65186],
- [373.50272, 134.27258, 432.18475, 187.81824]]),
+ np.array([[
+ 294.22674560546875, 116.6078109741211, 379.4328918457031,
+ 150.14097595214844
+ ],
+ [
+ 482.6017761230469, 110.75955963134766,
+ 522.8798828125, 129.71286010742188
+ ],
+ [
+ 167.06460571289062, 109.95974731445312,
+ 212.83975219726562, 140.16102600097656
+ ],
+ [
+ 609.2930908203125, 113.13909149169922,
+ 637.3115844726562, 136.4690704345703
+ ],
+ [
+ 191.185791015625, 111.1408920288086, 301.31689453125,
+ 155.7731170654297
+ ],
+ [
+ 431.2244873046875, 106.19962310791016,
+ 483.860595703125, 132.21627807617188
+ ],
+ [
+ 267.48358154296875, 105.5920639038086,
+ 325.2832336425781, 127.11176300048828
+ ],
+ [
+ 591.2138671875, 110.29329681396484,
+ 619.8524169921875, 126.1990966796875
+ ],
+ [
+ 0.0, 110.7026596069336, 61.487945556640625,
+ 146.33018493652344
+ ],
+ [
+ 555.9155883789062, 110.03486633300781,
+ 591.7050170898438, 127.06097412109375
+ ],
+ [
+ 60.24559783935547, 94.12760162353516,
+ 85.63741302490234, 106.66705322265625
+ ],
+ [
+ 99.02665710449219, 90.53657531738281,
+ 118.83953094482422, 101.18717956542969
+ ],
+ [
+ 396.30438232421875, 111.59194946289062,
+ 431.559814453125, 133.96914672851562
+ ],
+ [
+ 83.81543731689453, 89.65665435791016,
+ 99.9166259765625, 98.25627899169922
+ ],
+ [
+ 139.29647827148438, 96.68000793457031,
+ 165.22410583496094, 105.60000610351562
+ ],
+ [
+ 67.27152252197266, 89.42798614501953,
+ 83.25617980957031, 98.0460205078125
+ ],
+ [
+ 223.74176025390625, 98.68321990966797,
+ 250.42506408691406, 109.32588958740234
+ ],
+ [
+ 136.7582244873047, 96.51412963867188,
+ 152.51190185546875, 104.73160552978516
+ ],
+ [
+ 221.71812438964844, 97.86445617675781,
+ 238.9705810546875, 106.96803283691406
+ ],
+ [
+ 135.06964111328125, 91.80916595458984, 155.24609375,
+ 102.20686340332031
+ ],
+ [
+ 169.11180114746094, 97.53628540039062,
+ 182.88504028320312, 105.95404815673828
+ ],
+ [
+ 133.8811798095703, 91.00375366210938,
+ 145.35507202148438, 102.3780288696289
+ ],
+ [
+ 614.2507934570312, 102.19828796386719,
+ 636.5692749023438, 112.59198760986328
+ ],
+ [
+ 35.94759750366211, 91.7213363647461,
+ 70.38274383544922, 117.19855499267578
+ ],
+ [
+ 554.6401977539062, 115.18976593017578,
+ 562.0255737304688, 127.4429931640625
+ ],
+ [
+ 39.07550811767578, 92.73261260986328,
+ 85.36636352539062, 106.73953247070312
+ ],
+ [
+ 200.85513305664062, 93.00469970703125,
+ 219.73086547851562, 107.99642181396484
+ ],
+ [
+ 0.0, 111.18904876708984, 61.7393684387207,
+ 146.72547912597656
+ ],
+ [
+ 191.88568115234375, 111.09577178955078,
+ 299.4097900390625, 155.14639282226562
+ ],
+ [
+ 221.06834411621094, 176.6427001953125,
+ 458.3475341796875, 378.89300537109375
+ ],
+ [
+ 372.7131652832031, 135.51429748535156,
+ 433.2494201660156, 188.0106658935547
+ ],
+ [
+ 52.19819641113281, 110.3646011352539,
+ 70.95110321044922, 120.10567474365234
+ ],
+ [
+ 376.1671447753906, 133.6930694580078,
+ 432.2721862792969, 187.99481201171875
+ ]]),
decimal=1)
From ad78dfd3a1320dca8e523dc7abcd82b3aaaf8bfb Mon Sep 17 00:00:00 2001
From: zzoneee <55594658+zzoneee@users.noreply.github.com>
Date: Sat, 17 Sep 2022 11:21:47 +0800
Subject: [PATCH 5/9] fix DeiTIII cr bug (#196)
* fixbug_DeiTIII and rename vit_transfomer_dynamic.py to vit_transformer_dynamic.py
---
...dino_deit_small_p16_8xb2048_20e_feature.py | 2 +-
...moby_deit_small_p16_8xb2048_30e_feature.py | 2 +-
docs/source/api/easycv.models.backbones.rst | 2 +-
.../backbones/pytorch_image_models_wrapper.py | 12 +++----
easycv/models/backbones/vision_transformer.py | 30 ++++++++----------
..._dynamic.py => vit_transformer_dynamic.py} | 31 +++++++++++++++++--
6 files changed, 50 insertions(+), 29 deletions(-)
rename easycv/models/backbones/{vit_transfomer_dynamic.py => vit_transformer_dynamic.py} (89%)
diff --git a/benchmarks/selfsup/classification/imagenet/dino_deit_small_p16_8xb2048_20e_feature.py b/benchmarks/selfsup/classification/imagenet/dino_deit_small_p16_8xb2048_20e_feature.py
index 2d58705e..0e6ef6bc 100644
--- a/benchmarks/selfsup/classification/imagenet/dino_deit_small_p16_8xb2048_20e_feature.py
+++ b/benchmarks/selfsup/classification/imagenet/dino_deit_small_p16_8xb2048_20e_feature.py
@@ -10,7 +10,7 @@ oss_io_config = dict(
buckets=['your oss buckets'])
# model settings
-# 1920: merge 4 layers of features, open models/backbones/vit_transfomer_dynamic.py:311: self.forward_return_n_last_blocks
+# 1920: merge 4 layers of features, open models/backbones/vit_transformer_dynamic.py:311: self.forward_return_n_last_blocks
# 384: default
feature_num = 1920
model = dict(
diff --git a/benchmarks/selfsup/classification/imagenet/moby_deit_small_p16_8xb2048_30e_feature.py b/benchmarks/selfsup/classification/imagenet/moby_deit_small_p16_8xb2048_30e_feature.py
index 4c81e184..dcb45d31 100644
--- a/benchmarks/selfsup/classification/imagenet/moby_deit_small_p16_8xb2048_30e_feature.py
+++ b/benchmarks/selfsup/classification/imagenet/moby_deit_small_p16_8xb2048_30e_feature.py
@@ -10,7 +10,7 @@ oss_io_config = dict(
buckets=['your oss buckets'])
# model settings
-# 1920: merge 4 layers of features, open models/backbones/vit_transfomer_dynamic.py:311: self.forward_return_n_last_blocks
+# 1920: merge 4 layers of features, open models/backbones/vit_transformer_dynamic.py:311: self.forward_return_n_last_blocks
# 384: default
feature_num = 1920
model = dict(
diff --git a/docs/source/api/easycv.models.backbones.rst b/docs/source/api/easycv.models.backbones.rst
index 3f1ecfd0..4c742c37 100644
--- a/docs/source/api/easycv.models.backbones.rst
+++ b/docs/source/api/easycv.models.backbones.rst
@@ -156,7 +156,7 @@ easycv.models.backbones.swin\_transformer\_dynamic module
easycv.models.backbones.vit\_transfomer\_dynamic module
-------------------------------------------------------
-.. automodule:: easycv.models.backbones.vit_transfomer_dynamic
+.. automodule:: easycv.models.backbones.vit_transformer_dynamic
:members:
:undoc-members:
:show-inheritance:
diff --git a/easycv/models/backbones/pytorch_image_models_wrapper.py b/easycv/models/backbones/pytorch_image_models_wrapper.py
index 6b141489..176d286f 100644
--- a/easycv/models/backbones/pytorch_image_models_wrapper.py
+++ b/easycv/models/backbones/pytorch_image_models_wrapper.py
@@ -16,11 +16,11 @@ from .shuffle_transformer import (shuffletrans_base_p4_w7_224,
from .swin_transformer_dynamic import (dynamic_swin_base_p4_w7_224,
dynamic_swin_small_p4_w7_224,
dynamic_swin_tiny_p4_w7_224)
-from .vit_transfomer_dynamic import (dynamic_deit_small_p16,
- dynamic_deit_tiny_p16,
- dynamic_vit_base_p16,
- dynamic_vit_huge_p14,
- dynamic_vit_large_p16)
+from .vit_transformer_dynamic import (dynamic_deit_small_p16,
+ dynamic_deit_tiny_p16,
+ dynamic_vit_base_p16,
+ dynamic_vit_huge_p14,
+ dynamic_vit_large_p16)
from .xcit_transformer import (xcit_large_24_p8, xcit_medium_24_p8,
xcit_medium_24_p16, xcit_small_12_p8,
xcit_small_12_p16)
@@ -36,7 +36,7 @@ _MODEL_MAP = {
'dynamic_swin_small_p4_w7_224': dynamic_swin_small_p4_w7_224,
'dynamic_swin_base_p4_w7_224': dynamic_swin_base_p4_w7_224,
- # vit_transfomer_dynamic
+ # vit_transformer_dynamic
'dynamic_deit_small_p16': dynamic_deit_small_p16,
'dynamic_deit_tiny_p16': dynamic_deit_tiny_p16,
'dynamic_vit_base_p16': dynamic_vit_base_p16,
diff --git a/easycv/models/backbones/vision_transformer.py b/easycv/models/backbones/vision_transformer.py
index 2061979d..79a9c900 100644
--- a/easycv/models/backbones/vision_transformer.py
+++ b/easycv/models/backbones/vision_transformer.py
@@ -4,12 +4,10 @@ Mostly copy-paste from timm library.
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
"""
-import math
from functools import partial
import torch
import torch.nn as nn
-import torch.nn.functional as F
from timm.models.layers import trunc_normal_
from easycv.models.utils import DropPath, Mlp
@@ -163,8 +161,6 @@ class VisionTransformer(nn.Module):
forward layer. Default: 0.0
drop_path_rate (float): Stochastic depth rate. Default: 0
norm_layer (nn.Module): normalization layer
- use_dense_prediction (bool): If use_dense_prediction is True, the global
- pool and norm will before head will be removed.(if any) Default: False
global_pool (bool): Global pool before head. Default: False
use_layer_scale (bool): If use_layer_scale is True, it will use layer
scale. Default: False
@@ -188,7 +184,6 @@ class VisionTransformer(nn.Module):
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
- use_dense_prediction=False,
global_pool=False,
use_layer_scale=False,
init_scale=1e-4,
@@ -196,6 +191,15 @@ class VisionTransformer(nn.Module):
super().__init__()
self.num_features = self.embed_dim = embed_dim
+ self.num_heads = num_heads
+ self.mlp_ratio = mlp_ratio
+ self.qkv_bias = qkv_bias
+ self.qk_scale = qk_scale
+ self.drop_rate = drop_rate
+ self.attn_drop_rate = attn_drop_rate
+ self.norm_layer = norm_layer
+ self.use_layer_scale = use_layer_scale
+ self.init_scale = init_scale
self.patch_embed = PatchEmbed(
img_size=img_size[0],
@@ -231,11 +235,6 @@ class VisionTransformer(nn.Module):
self.head = nn.Linear(
embed_dim, num_classes) if num_classes > 0 else nn.Identity()
- # Dense prediction head
- self.use_dense_prediction = use_dense_prediction
- if self.use_dense_prediction:
- self.head_dense = None
-
# Use global average pooling
self.global_pool = global_pool
if self.global_pool:
@@ -277,11 +276,8 @@ class VisionTransformer(nn.Module):
if self.norm is not None:
x = self.norm(x)
- if self.use_dense_prediction:
- return x[:, 0], x[:, 1:]
+ if self.global_pool:
+ x = x[:, 1:, :].mean(dim=1)
+ return self.fc_norm(x)
else:
- if self.global_pool:
- x = x[:, 1:, :].mean(dim=1)
- return self.fc_norm(x)
- else:
- return x[:, 0]
+ return x[:, 0]
diff --git a/easycv/models/backbones/vit_transfomer_dynamic.py b/easycv/models/backbones/vit_transformer_dynamic.py
similarity index 89%
rename from easycv/models/backbones/vit_transfomer_dynamic.py
rename to easycv/models/backbones/vit_transformer_dynamic.py
index 6df88d2c..3f8d0968 100644
--- a/easycv/models/backbones/vit_transfomer_dynamic.py
+++ b/easycv/models/backbones/vit_transformer_dynamic.py
@@ -13,13 +13,19 @@ from functools import partial
import torch
import torch.nn as nn
-from easycv.models.backbones.vision_transformer import VisionTransformer
+from easycv.models.backbones.vision_transformer import Block, VisionTransformer
class DynamicVisionTransformer(VisionTransformer):
- """Dynamic Vision Transformer """
+ """Dynamic Vision Transformer
- def __init__(self, **kwargs):
+ Args:
+ use_dense_prediction (bool): If use_dense_prediction is True, the global
+ pool and norm will before head will be removed.(if any) Default: False
+
+ """
+
+ def __init__(self, use_dense_prediction=False, **kwargs):
super(DynamicVisionTransformer, self).__init__(**kwargs)
num_patches = self.patch_embed.num_patches
@@ -31,6 +37,25 @@ class DynamicVisionTransformer(VisionTransformer):
x.item()
for x in torch.linspace(0, self.drop_path_rate, self.depth)
]
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=self.embed_dim,
+ num_heads=self.num_heads,
+ mlp_ratio=self.mlp_ratio,
+ qkv_bias=self.qkv_bias,
+ qk_scale=self.qk_scale,
+ drop=self.drop_rate,
+ attn_drop=self.attn_drop_rate,
+ drop_path=dpr[i],
+ norm_layer=self.norm_layer,
+ use_layer_scale=self.use_layer_scale,
+ init_values=self.init_scale) for i in range(self.depth)
+ ])
+
+ # Dense prediction head
+ self.use_dense_prediction = use_dense_prediction
+ if self.use_dense_prediction:
+ self.head_dense = None
def forward(self, x):
# convert to list
From 5ac638175873a66551869fee5cce2b94575a6842 Mon Sep 17 00:00:00 2001
From: Cathy0908 <30484308+Cathy0908@users.noreply.github.com>
Date: Mon, 19 Sep 2022 16:07:04 +0800
Subject: [PATCH 6/9] add error code (#146)
* add error code
---
benchmarks/tools/extract.py | 1 +
benchmarks/tools/extract_backbone_weights.py | 4 +-
benchmarks/tools/linear_eval.py | 5 +-
easycv/apis/export.py | 1 +
easycv/apis/test.py | 1 +
easycv/core/evaluation/coco_evaluation.py | 3 +-
easycv/core/evaluation/keypoint_eval.py | 1 +
easycv/core/evaluation/metric_registry.py | 2 +
easycv/core/evaluation/segmentation_eval.py | 1 +
easycv/core/evaluation/top_down_eval.py | 1 +
easycv/core/optimizer/adam.py | 2 +
easycv/core/optimizer/lamb.py | 2 +
easycv/core/optimizer/lars.py | 2 +
easycv/core/optimizer/ranger.py | 2 +
easycv/core/sailfish/linear.py | 1 +
easycv/core/sailfish/util.py | 1 +
easycv/core/visualization/image.py | 6 +-
.../classification/data_sources/image_list.py | 5 +-
easycv/datasets/classification/odps.py | 1 +
.../classification/pipelines/auto_augment.py | 1 +
.../datasets/detection/data_sources/base.py | 1 +
.../datasets/detection/data_sources/coco.py | 1 +
.../detection/data_sources/coco_panoptic.py | 1 +
.../detection/data_sources/pai_format.py | 1 +
easycv/datasets/detection/mix.py | 1 +
.../detection/pipelines/mm_transforms.py | 11 +-
easycv/datasets/detection/raw.py | 3 +-
easycv/datasets/loader/build_loader.py | 1 +
easycv/datasets/loader/sampler.py | 6 +-
easycv/datasets/pose/data_sources/coco.py | 1 +
easycv/datasets/pose/data_sources/top_down.py | 1 +
.../pose/hand_coco_wholebody_dataset.py | 1 +
easycv/datasets/pose/pipelines/transforms.py | 1 +
easycv/datasets/pose/top_down.py | 1 +
.../segmentation/data_sources/base.py | 1 +
.../selfsup/data_sources/image_list.py | 1 +
easycv/datasets/shared/multi_view.py | 1 +
easycv/datasets/shared/pipelines/format.py | 1 +
.../datasets/shared/pipelines/transforms.py | 1 +
easycv/datasets/shared/raw.py | 1 +
easycv/file/base.py | 2 +
easycv/file/file_io.py | 4 +-
easycv/file/image.py | 3 +-
easycv/file/utils.py | 2 +
easycv/framework/__init__.py | 0
easycv/framework/errors.py | 128 ++++++++++++++++++
easycv/hooks/eval_hook.py | 1 +
easycv/hooks/extractor.py | 1 +
easycv/hooks/optimizer_hook.py | 3 +-
easycv/models/backbones/hrnet.py | 1 +
easycv/models/backbones/lighthrnet.py | 1 +
easycv/models/backbones/mit.py | 1 +
easycv/models/backbones/mobilenetv2.py | 1 +
.../backbones/pytorch_image_models_wrapper.py | 1 +
easycv/models/backbones/resnest.py | 1 +
easycv/models/backbones/resnet.py | 1 +
easycv/models/backbones/resnet_jit.py | 1 +
.../models/backbones/shuffle_transformer.py | 1 +
easycv/models/backbones/xcit_transformer.py | 3 +-
easycv/models/base.py | 2 +
.../models/classification/classification.py | 3 +-
.../detection/detectors/dab_detr/attention.py | 2 +
.../dab_detr/dab_detr_transformer.py | 1 +
.../models/detection/detectors/detection.py | 1 +
.../detectors/dino/deformable_transformer.py | 1 +
.../detection/detectors/dino/dino_head.py | 1 +
.../models/detection/detectors/yolox/asff.py | 1 +
.../detectors/yolox/yolo_head_template.py | 1 +
easycv/models/detection/necks/fpn.py | 1 +
easycv/models/detection/necks/sfp.py | 1 +
easycv/models/detection/utils/misc.py | 2 +
easycv/models/loss/cross_entropy_loss.py | 1 +
easycv/models/loss/focal_loss.py | 1 +
easycv/models/loss/iou_loss.py | 1 +
easycv/models/loss/utils.py | 2 +
.../pose/heads/topdown_heatmap_base_head.py | 1 +
.../pose/heads/topdown_heatmap_simple_head.py | 1 +
easycv/models/segmentation/encoder_decoder.py | 1 +
easycv/models/segmentation/heads/base.py | 1 +
.../segmentation/heads/transformer_decoder.py | 2 +
easycv/models/segmentation/utils/criterion.py | 1 +
easycv/models/selfsup/byol.py | 3 +-
easycv/models/selfsup/dino.py | 3 +-
easycv/models/selfsup/mae.py | 3 +-
easycv/models/selfsup/moby.py | 7 +-
easycv/models/selfsup/moco.py | 7 +-
easycv/models/selfsup/simclr.py | 3 +-
easycv/models/selfsup/swav.py | 7 +-
easycv/models/utils/conv_module.py | 1 +
easycv/models/utils/norm.py | 2 +
easycv/models/utils/transformer.py | 2 +
easycv/predictors/base.py | 1 +
easycv/predictors/classifier.py | 3 +-
easycv/predictors/detector.py | 1 +
easycv/predictors/feature_extractor.py | 3 +-
easycv/predictors/pose_predictor.py | 1 +
easycv/runner/ev_runner.py | 1 +
easycv/toolkit/blade/cv_blade_utils.py | 6 +-
easycv/toolkit/prune/prune_utils.py | 4 +-
easycv/toolkit/quantize/quantize_utils.py | 1 +
easycv/toolkit/torchacc/convert_ops.py | 2 +
easycv/utils/checkpoint.py | 1 +
easycv/utils/collect.py | 1 +
easycv/utils/config_tools.py | 5 +-
easycv/utils/json_utils.py | 2 +
easycv/utils/logger.py | 2 +
easycv/utils/mmlab_utils.py | 3 +-
easycv/utils/registry.py | 2 +
easycv/utils/test_util.py | 1 +
easycv/utils/user_config_params_utils.py | 2 +
tests/core/evaluation/test_coco_evaluation.py | 1 +
tests/core/optimizer/test_optimizers.py | 2 +
.../data_sources/test_det_voc_datasource.py | 3 +-
tests/framework/__init__.py | 0
tests/framework/test_errors.py | 52 +++++++
tests/utils/test_json_utils.py | 1 +
tools/eval.py | 4 +-
117 files changed, 367 insertions(+), 41 deletions(-)
create mode 100644 easycv/framework/__init__.py
create mode 100644 easycv/framework/errors.py
create mode 100644 tests/framework/__init__.py
create mode 100644 tests/framework/test_errors.py
diff --git a/benchmarks/tools/extract.py b/benchmarks/tools/extract.py
index 1aff8fa6..9214a282 100644
--- a/benchmarks/tools/extract.py
+++ b/benchmarks/tools/extract.py
@@ -15,6 +15,7 @@ from mmcv.runner import get_dist_info, init_dist, load_checkpoint
from easycv.apis import set_random_seed
from easycv.datasets import build_dataloader, build_dataset
from easycv.file import io
+from easycv.framework.errors import ValueError
from easycv.models import build_model
from easycv.utils.collect import dist_forward_collect, nondist_forward_collect
from easycv.utils.config_tools import mmcv_config_fromfile
diff --git a/benchmarks/tools/extract_backbone_weights.py b/benchmarks/tools/extract_backbone_weights.py
index d3eb38f1..976611ee 100644
--- a/benchmarks/tools/extract_backbone_weights.py
+++ b/benchmarks/tools/extract_backbone_weights.py
@@ -3,6 +3,8 @@ import argparse
import torch
+from easycv.framework.errors import ValueError
+
def parse_args():
parser = argparse.ArgumentParser(
@@ -24,7 +26,7 @@ def main():
output_dict['state_dict'][key[9:]] = value
has_backbone = True
if not has_backbone:
- raise Exception('Cannot find a backbone module in the checkpoint.')
+ raise ValueError('Cannot find a backbone module in the checkpoint.')
torch.save(output_dict, args.output)
diff --git a/benchmarks/tools/linear_eval.py b/benchmarks/tools/linear_eval.py
index 2f74e191..a892e4bf 100644
--- a/benchmarks/tools/linear_eval.py
+++ b/benchmarks/tools/linear_eval.py
@@ -2,11 +2,12 @@
import argparse
import os
import shutil
-import sys
import time
import torch
+from easycv.framework.errors import ValueError
+
args = argparse.ArgumentParser(description='Process some integers.')
args.add_argument(
'model_path',
@@ -88,7 +89,7 @@ def extract_model(model_path):
output_dict['state_dict'][key[9:]] = value
has_backbone = True
if not has_backbone:
- raise Exception('Cannot find a backbone module in the checkpoint.')
+ raise ValueError('Cannot find a backbone module in the checkpoint.')
torch.save(output_dict, backbone_file)
return backbone_file
diff --git a/easycv/apis/export.py b/easycv/apis/export.py
index fe8a1850..c2633acb 100644
--- a/easycv/apis/export.py
+++ b/easycv/apis/export.py
@@ -13,6 +13,7 @@ import torchvision.transforms.functional as t_f
from mmcv.utils import Config
from easycv.file import io
+from easycv.framework.errors import ValueError
from easycv.models import (DINO, MOCO, SWAV, YOLOX, Classification, MoBY,
build_model)
from easycv.utils.checkpoint import load_checkpoint
diff --git a/easycv/apis/test.py b/easycv/apis/test.py
index 7d3e3dda..d27a0291 100644
--- a/easycv/apis/test.py
+++ b/easycv/apis/test.py
@@ -15,6 +15,7 @@ from mmcv.parallel import (MMDataParallel, MMDistributedDataParallel,
from mmcv.runner import get_dist_info
from easycv.file import io
+from easycv.framework.errors import ValueError
from easycv.utils.torchacc_util import is_torchacc_enabled
diff --git a/easycv/core/evaluation/coco_evaluation.py b/easycv/core/evaluation/coco_evaluation.py
index fe5cc075..63891626 100644
--- a/easycv/core/evaluation/coco_evaluation.py
+++ b/easycv/core/evaluation/coco_evaluation.py
@@ -31,6 +31,7 @@ from easycv.core import standard_fields
from easycv.core.evaluation import coco_tools
from easycv.core.post_processing.nms import oks_nms, soft_oks_nms
from easycv.core.standard_fields import DetectionResultFields, InputDataFields
+from easycv.framework.errors import KeyError, TypeError, ValueError
from easycv.utils.json_utils import MyEncoder
from .base_evaluator import Evaluator
from .builder import EVALUATORS
@@ -365,7 +366,7 @@ class CocoDetectionEvaluator(Evaluator):
def _check_mask_type_and_value(array_name, masks):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1."""
if masks.dtype != np.uint8:
- raise ValueError('{} must be of type np.uint8. Found {}.'.format(
+ raise TypeError('{} must be of type np.uint8. Found {}.'.format(
array_name, masks.dtype))
if np.any(np.logical_and(masks != 0, masks != 1)):
raise ValueError(
diff --git a/easycv/core/evaluation/keypoint_eval.py b/easycv/core/evaluation/keypoint_eval.py
index 0549a71f..4ab4f0c6 100644
--- a/easycv/core/evaluation/keypoint_eval.py
+++ b/easycv/core/evaluation/keypoint_eval.py
@@ -3,6 +3,7 @@
# https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
import numpy as np
+from easycv.framework.errors import KeyError
from .base_evaluator import Evaluator
from .builder import EVALUATORS
from .metric_registry import METRICS
diff --git a/easycv/core/evaluation/metric_registry.py b/easycv/core/evaluation/metric_registry.py
index 35f89f5d..5c2f3e0e 100644
--- a/easycv/core/evaluation/metric_registry.py
+++ b/easycv/core/evaluation/metric_registry.py
@@ -1,6 +1,8 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import inspect
+from easycv.framework.errors import KeyError, TypeError
+
class MetricRegistry(object):
diff --git a/easycv/core/evaluation/segmentation_eval.py b/easycv/core/evaluation/segmentation_eval.py
index d76d4d66..81cbd82f 100644
--- a/easycv/core/evaluation/segmentation_eval.py
+++ b/easycv/core/evaluation/segmentation_eval.py
@@ -5,6 +5,7 @@ import numpy as np
import torch
from prettytable import PrettyTable
+from easycv.framework.errors import KeyError
from easycv.utils.logger import print_log
from .base_evaluator import Evaluator
from .builder import EVALUATORS
diff --git a/easycv/core/evaluation/top_down_eval.py b/easycv/core/evaluation/top_down_eval.py
index ebb505e8..47a4f2dc 100644
--- a/easycv/core/evaluation/top_down_eval.py
+++ b/easycv/core/evaluation/top_down_eval.py
@@ -6,6 +6,7 @@ import cv2
import numpy as np
from easycv.core.post_processing import transform_preds
+from easycv.framework.errors import ValueError
def _calc_distances(preds, targets, mask, normalize):
diff --git a/easycv/core/optimizer/adam.py b/easycv/core/optimizer/adam.py
index e015d523..f1bebd9d 100644
--- a/easycv/core/optimizer/adam.py
+++ b/easycv/core/optimizer/adam.py
@@ -8,6 +8,8 @@ from mmcv.runner.optimizer.builder import OPTIMIZERS
from torch import Tensor
from torch.optim import AdamW as _AdamW
+from easycv.framework.errors import RuntimeError
+
def adamw(params: List[Tensor], grads: List[Tensor], exp_avgs: List[Tensor],
exp_avg_sqs: List[Tensor], max_exp_avg_sqs: List[Tensor],
diff --git a/easycv/core/optimizer/lamb.py b/easycv/core/optimizer/lamb.py
index 6295cdc7..92a296fa 100644
--- a/easycv/core/optimizer/lamb.py
+++ b/easycv/core/optimizer/lamb.py
@@ -5,6 +5,8 @@ import torch
from mmcv.runner import OPTIMIZERS
from torch.optim import Optimizer
+from easycv.framework.errors import RuntimeError
+
@OPTIMIZERS.register_module()
class Lamb(Optimizer):
diff --git a/easycv/core/optimizer/lars.py b/easycv/core/optimizer/lars.py
index f6700bce..07d7d5f6 100644
--- a/easycv/core/optimizer/lars.py
+++ b/easycv/core/optimizer/lars.py
@@ -3,6 +3,8 @@ import torch
from torch.optim import * # noqa: F401,F403
from torch.optim.optimizer import Optimizer, required
+from easycv.framework.errors import ValueError
+
class LARS(Optimizer):
r"""Implements layer-wise adaptive rate scaling for SGD.
diff --git a/easycv/core/optimizer/ranger.py b/easycv/core/optimizer/ranger.py
index 727b6f0e..5ec04aac 100644
--- a/easycv/core/optimizer/ranger.py
+++ b/easycv/core/optimizer/ranger.py
@@ -4,6 +4,8 @@ import math
import torch
from torch.optim.optimizer import Optimizer
+from easycv.framework.errors import ValueError
+
def centralized_gradient(x, use_gc=True, gc_conv_only=False):
'''credit - https://github.com/Yonghongwei/Gradient-Centralization '''
diff --git a/easycv/core/sailfish/linear.py b/easycv/core/sailfish/linear.py
index 6386dab6..939de5b4 100644
--- a/easycv/core/sailfish/linear.py
+++ b/easycv/core/sailfish/linear.py
@@ -22,6 +22,7 @@ import torch
from easycv.core.sailfish.util import (BiasUniformInitializer,
KaimingUniformInitializer,
ModelParallel, RenormUniformInitializer)
+from easycv.framework.errors import ValueError
class Linear(torch.nn.Module):
diff --git a/easycv/core/sailfish/util.py b/easycv/core/sailfish/util.py
index 57155cbc..c54fd61d 100644
--- a/easycv/core/sailfish/util.py
+++ b/easycv/core/sailfish/util.py
@@ -25,6 +25,7 @@ from easycv.core.sailfish.function import (all_cat, all_log_softmax,
shard_correct_predictions,
shard_target_and_mask,
shard_topk_correct_predictions)
+from easycv.framework.errors import NotImplementedError, ValueError
class DistributedParallel:
diff --git a/easycv/core/visualization/image.py b/easycv/core/visualization/image.py
index 3b61f50b..9c79341c 100644
--- a/easycv/core/visualization/image.py
+++ b/easycv/core/visualization/image.py
@@ -10,6 +10,8 @@ import numpy as np
from mmcv.utils.misc import deprecated_api_warning
from PIL import Image, ImageDraw, ImageFont
+from easycv.framework.errors import FileNotFoundError
+
def get_font_path():
root_path = opd(opd(opd(os.path.realpath(__file__))))
@@ -22,8 +24,8 @@ def get_font_path():
elif os.path.exists(find_path_source):
return find_path_source
else:
- raise ValueError('Not find font file both in %s and %s' %
- (find_path_whl, find_path_source))
+ raise FileNotFoundError('Not find font file both in %s and %s' %
+ (find_path_whl, find_path_source))
_FONT_PATH = get_font_path()
diff --git a/easycv/datasets/classification/data_sources/image_list.py b/easycv/datasets/classification/data_sources/image_list.py
index 9835daa7..e37f9fa8 100644
--- a/easycv/datasets/classification/data_sources/image_list.py
+++ b/easycv/datasets/classification/data_sources/image_list.py
@@ -7,6 +7,7 @@ from PIL import Image, ImageFile
from easycv.datasets.registry import DATASOURCES
from easycv.file import io
+from easycv.framework.errors import TypeError
from easycv.utils.dist_utils import dist_zero_exec
from .utils import split_listfile_byrank
@@ -54,8 +55,8 @@ class ClsSourceImageList(object):
'list_file should be str or list(str)'
root = [root] if isinstance(root, str) else root
if not isinstance(root, list):
- raise ValueError('root must be str or list(str), but get %s' %
- type(root))
+ raise TypeError('root must be str or list(str), but get %s' %
+ type(root))
if len(root) < len(list_file):
logging.warning(
diff --git a/easycv/datasets/classification/odps.py b/easycv/datasets/classification/odps.py
index 24e24006..e8bf62f5 100644
--- a/easycv/datasets/classification/odps.py
+++ b/easycv/datasets/classification/odps.py
@@ -3,6 +3,7 @@ from PIL import Image
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
+from easycv.framework.errors import NotImplementedError
@DATASETS.register_module
diff --git a/easycv/datasets/classification/pipelines/auto_augment.py b/easycv/datasets/classification/pipelines/auto_augment.py
index e9bef83a..84c8b3ae 100644
--- a/easycv/datasets/classification/pipelines/auto_augment.py
+++ b/easycv/datasets/classification/pipelines/auto_augment.py
@@ -12,6 +12,7 @@ from PIL import Image, ImageFilter
from easycv.datasets.registry import PIPELINES
from easycv.datasets.shared.pipelines import Compose
+from easycv.framework.errors import TypeError
# Default hyperparameters for all Ops
_HPARAMS_DEFAULT = dict(pad_val=128)
diff --git a/easycv/datasets/detection/data_sources/base.py b/easycv/datasets/detection/data_sources/base.py
index 60bf04a3..fb1677be 100644
--- a/easycv/datasets/detection/data_sources/base.py
+++ b/easycv/datasets/detection/data_sources/base.py
@@ -10,6 +10,7 @@ from mmcv.runner.dist_utils import get_dist_info
from tqdm import tqdm
from easycv.file.image import load_image
+from easycv.framework.errors import NotImplementedError, ValueError
def _load_image(img_path):
diff --git a/easycv/datasets/detection/data_sources/coco.py b/easycv/datasets/detection/data_sources/coco.py
index 1f1efca4..76709f32 100644
--- a/easycv/datasets/detection/data_sources/coco.py
+++ b/easycv/datasets/detection/data_sources/coco.py
@@ -4,6 +4,7 @@ from xtcocotools.coco import COCO
from easycv.datasets.registry import DATASOURCES, PIPELINES
from easycv.datasets.shared.pipelines import Compose
+from easycv.framework.errors import TypeError
from easycv.utils.registry import build_from_cfg
diff --git a/easycv/datasets/detection/data_sources/coco_panoptic.py b/easycv/datasets/detection/data_sources/coco_panoptic.py
index fcf2060a..9ee2ea96 100644
--- a/easycv/datasets/detection/data_sources/coco_panoptic.py
+++ b/easycv/datasets/detection/data_sources/coco_panoptic.py
@@ -8,6 +8,7 @@ from xtcocotools.coco import COCO
from easycv.datasets.detection.data_sources import DetSourceCoco
from easycv.datasets.registry import DATASOURCES, PIPELINES
from easycv.datasets.shared.pipelines import Compose
+from easycv.framework.errors import RuntimeError, TypeError
from easycv.utils.registry import build_from_cfg
try:
diff --git a/easycv/datasets/detection/data_sources/pai_format.py b/easycv/datasets/detection/data_sources/pai_format.py
index 6f390c86..8ad26e5a 100644
--- a/easycv/datasets/detection/data_sources/pai_format.py
+++ b/easycv/datasets/detection/data_sources/pai_format.py
@@ -8,6 +8,7 @@ import numpy as np
from easycv.datasets.detection.data_sources.base import DetSourceBase
from easycv.datasets.registry import DATASOURCES
from easycv.file import io
+from easycv.framework.errors import NotImplementedError, ValueError
def get_prior_task_id(keys):
diff --git a/easycv/datasets/detection/mix.py b/easycv/datasets/detection/mix.py
index 6e7d203f..19394908 100644
--- a/easycv/datasets/detection/mix.py
+++ b/easycv/datasets/detection/mix.py
@@ -9,6 +9,7 @@ import numpy as np
import torch
from easycv.datasets.registry import DATASETS, PIPELINES
+from easycv.framework.errors import TypeError
from easycv.utils.bbox_util import xyxy2xywh as xyxy2cxcywh
from easycv.utils.registry import build_from_cfg
from .raw import DetDataset
diff --git a/easycv/datasets/detection/pipelines/mm_transforms.py b/easycv/datasets/detection/pipelines/mm_transforms.py
index cd4257cc..0c5846ec 100644
--- a/easycv/datasets/detection/pipelines/mm_transforms.py
+++ b/easycv/datasets/detection/pipelines/mm_transforms.py
@@ -13,6 +13,7 @@ from torchvision.transforms import functional as F
from easycv.datasets.registry import PIPELINES
from easycv.datasets.shared.pipelines.transforms import Compose
+from easycv.framework.errors import KeyError, NotImplementedError, TypeError
try:
from panopticapi.utils import rgb2id
@@ -1122,8 +1123,8 @@ class MMRandomFlip:
elif flip_ratio is None:
pass
else:
- raise ValueError('flip_ratios must be None, float, '
- 'or list of float')
+ raise TypeError('flip_ratios must be None, float, '
+ 'or list of float')
self.flip_ratio = flip_ratio
valid_directions = ['horizontal', 'vertical', 'diagonal']
@@ -1133,7 +1134,7 @@ class MMRandomFlip:
assert mmcv.is_list_of(direction, str)
assert set(direction).issubset(set(valid_directions))
else:
- raise ValueError('direction must be either str or list of str')
+ raise TypeError('direction must be either str or list of str')
self.direction = direction
if isinstance(flip_ratio, list):
@@ -1168,7 +1169,7 @@ class MMRandomFlip:
flipped[..., 2::4] = w - bboxes[..., 0::4]
flipped[..., 3::4] = h - bboxes[..., 1::4]
else:
- raise ValueError(f"Invalid flipping direction '{direction}'")
+ raise KeyError(f"Invalid flipping direction '{direction}'")
return flipped
def __call__(self, results):
@@ -1274,7 +1275,7 @@ class MMRandomCrop:
if crop_type not in [
'relative_range', 'relative', 'absolute', 'absolute_range'
]:
- raise ValueError(f'Invalid crop_type {crop_type}.')
+ raise KeyError(f'Invalid crop_type {crop_type}.')
if crop_type in ['absolute', 'absolute_range']:
assert crop_size[0] > 0 and crop_size[1] > 0
assert isinstance(crop_size[0], int) and isinstance(
diff --git a/easycv/datasets/detection/raw.py b/easycv/datasets/detection/raw.py
index 49f6a3b1..3f6800a1 100644
--- a/easycv/datasets/detection/raw.py
+++ b/easycv/datasets/detection/raw.py
@@ -9,6 +9,7 @@ from easycv.datasets.detection.data_sources import DetSourceCoco
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
from easycv.file.image import load_image
+from easycv.framework.errors import TimeoutError
@DATASETS.register_module
@@ -38,7 +39,7 @@ class DetDataset(BaseDataset):
count = 0
while True:
if count > 10:
- raise RuntimeError('Loops timeout')
+ raise TimeoutError('Loops timeout')
data_dict = self.data_source[idx]
data_dict = self.pipeline(data_dict)
if data_dict is None:
diff --git a/easycv/datasets/loader/build_loader.py b/easycv/datasets/loader/build_loader.py
index 08127325..4977553b 100644
--- a/easycv/datasets/loader/build_loader.py
+++ b/easycv/datasets/loader/build_loader.py
@@ -11,6 +11,7 @@ from mmcv.runner import get_dist_info
from torch.utils.data import DataLoader, RandomSampler
from easycv.datasets.shared.odps_reader import set_dataloader_workid
+from easycv.framework.errors import NotImplementedError
from easycv.utils.dist_utils import sync_random_seed
from easycv.utils.torchacc_util import is_torchacc_enabled
from .collate import CollateWrapper
diff --git a/easycv/datasets/loader/sampler.py b/easycv/datasets/loader/sampler.py
index 4c22695f..fd39d054 100644
--- a/easycv/datasets/loader/sampler.py
+++ b/easycv/datasets/loader/sampler.py
@@ -11,6 +11,8 @@ from mmcv.runner import get_dist_info
from torch.utils.data import DistributedSampler as _DistributedSampler
from torch.utils.data import Sampler
+from easycv.framework.errors import ValueError
+
class DistributedMPSampler(_DistributedSampler):
@@ -84,7 +86,9 @@ class DistributedMPSampler(_DistributedSampler):
self.label_list = []
if not self.dataset.data_source.has_labels:
- raise 'MPSampler need initial with classification datasets which has label!'
+ raise ValueError(
+ 'MPSampler need initial with classification datasets which has label!'
+ )
for idx, label in enumerate(self.dataset.data_source.labels):
if label in self.label_dict.keys():
diff --git a/easycv/datasets/pose/data_sources/coco.py b/easycv/datasets/pose/data_sources/coco.py
index 86db5dd0..0f9e9260 100644
--- a/easycv/datasets/pose/data_sources/coco.py
+++ b/easycv/datasets/pose/data_sources/coco.py
@@ -7,6 +7,7 @@ import json_tricks as json
import numpy as np
from easycv.datasets.registry import DATASOURCES
+from easycv.framework.errors import ValueError
from .top_down import PoseTopDownSource
COCO_DATASET_INFO = dict(
diff --git a/easycv/datasets/pose/data_sources/top_down.py b/easycv/datasets/pose/data_sources/top_down.py
index f892bc4c..3f20d7b3 100644
--- a/easycv/datasets/pose/data_sources/top_down.py
+++ b/easycv/datasets/pose/data_sources/top_down.py
@@ -12,6 +12,7 @@ from mmcv.utils.path import is_filepath
from xtcocotools.coco import COCO
from easycv.datasets.registry import DATASOURCES
+from easycv.framework.errors import ValueError
class DatasetInfo:
diff --git a/easycv/datasets/pose/hand_coco_wholebody_dataset.py b/easycv/datasets/pose/hand_coco_wholebody_dataset.py
index 3084ba02..5cbd65a7 100644
--- a/easycv/datasets/pose/hand_coco_wholebody_dataset.py
+++ b/easycv/datasets/pose/hand_coco_wholebody_dataset.py
@@ -6,6 +6,7 @@ from easycv.core.evaluation.keypoint_eval import KeyPointEvaluator
from easycv.datasets.pose.data_sources.coco import PoseTopDownSource
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
+from easycv.framework.errors import ValueError
@DATASETS.register_module()
diff --git a/easycv/datasets/pose/pipelines/transforms.py b/easycv/datasets/pose/pipelines/transforms.py
index 27c7c325..8401ee8f 100644
--- a/easycv/datasets/pose/pipelines/transforms.py
+++ b/easycv/datasets/pose/pipelines/transforms.py
@@ -9,6 +9,7 @@ from easycv.core.post_processing import (affine_transform, fliplr_joints,
get_affine_transform, get_warp_matrix,
warp_affine_joints)
from easycv.datasets.registry import PIPELINES
+from easycv.framework.errors import ValueError
@PIPELINES.register_module()
diff --git a/easycv/datasets/pose/top_down.py b/easycv/datasets/pose/top_down.py
index 3d972208..1946a654 100644
--- a/easycv/datasets/pose/top_down.py
+++ b/easycv/datasets/pose/top_down.py
@@ -3,6 +3,7 @@ from easycv.core.evaluation.coco_evaluation import CoCoPoseTopDownEvaluator
from easycv.datasets.pose.data_sources.coco import PoseTopDownSource
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
+from easycv.framework.errors import ValueError
@DATASETS.register_module()
diff --git a/easycv/datasets/segmentation/data_sources/base.py b/easycv/datasets/segmentation/data_sources/base.py
index 888fd477..b8dc3673 100644
--- a/easycv/datasets/segmentation/data_sources/base.py
+++ b/easycv/datasets/segmentation/data_sources/base.py
@@ -12,6 +12,7 @@ from tqdm import tqdm
from easycv.datasets.registry import DATASOURCES
from easycv.file.image import load_image as _load_img
+from easycv.framework.errors import NotImplementedError, ValueError
def load_image(img_path):
diff --git a/easycv/datasets/selfsup/data_sources/image_list.py b/easycv/datasets/selfsup/data_sources/image_list.py
index fa61de5d..93637b57 100644
--- a/easycv/datasets/selfsup/data_sources/image_list.py
+++ b/easycv/datasets/selfsup/data_sources/image_list.py
@@ -7,6 +7,7 @@ from PIL import Image, ImageFile
from easycv.datasets.registry import DATASOURCES
from easycv.file import io
+from easycv.framework.errors import ValueError
@DATASOURCES.register_module
diff --git a/easycv/datasets/shared/multi_view.py b/easycv/datasets/shared/multi_view.py
index 7c96d9d6..b5bbd43f 100644
--- a/easycv/datasets/shared/multi_view.py
+++ b/easycv/datasets/shared/multi_view.py
@@ -7,6 +7,7 @@ from easycv.datasets.builder import build_datasource
from easycv.datasets.registry import DATASETS, PIPELINES
from easycv.datasets.shared.base import BaseDataset
from easycv.datasets.shared.pipelines.transforms import Compose
+from easycv.framework.errors import NotImplementedError
from easycv.utils.registry import build_from_cfg
diff --git a/easycv/datasets/shared/pipelines/format.py b/easycv/datasets/shared/pipelines/format.py
index 22a463e2..d46e0c34 100644
--- a/easycv/datasets/shared/pipelines/format.py
+++ b/easycv/datasets/shared/pipelines/format.py
@@ -7,6 +7,7 @@ import torch
from mmcv.parallel import DataContainer as DC
from easycv.datasets.registry import PIPELINES
+from easycv.framework.errors import TypeError
def to_tensor(data):
diff --git a/easycv/datasets/shared/pipelines/transforms.py b/easycv/datasets/shared/pipelines/transforms.py
index 7de5c0a9..31e4a966 100644
--- a/easycv/datasets/shared/pipelines/transforms.py
+++ b/easycv/datasets/shared/pipelines/transforms.py
@@ -6,6 +6,7 @@ import numpy as np
from easycv.datasets.registry import PIPELINES
from easycv.file.image import load_image
+from easycv.framework.errors import TypeError
from easycv.utils.registry import build_from_cfg
diff --git a/easycv/datasets/shared/raw.py b/easycv/datasets/shared/raw.py
index ed30275c..10f5e0a3 100644
--- a/easycv/datasets/shared/raw.py
+++ b/easycv/datasets/shared/raw.py
@@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from easycv.datasets.registry import DATASETS
+from easycv.framework.errors import NotImplementedError
from .base import BaseDataset
diff --git a/easycv/file/base.py b/easycv/file/base.py
index 291e219c..47ee5d73 100644
--- a/easycv/file/base.py
+++ b/easycv/file/base.py
@@ -9,6 +9,8 @@ from datetime import datetime
from functools import lru_cache
from typing import List, Union
+from easycv.framework.errors import NotImplementedError
+
class IOBase:
diff --git a/easycv/file/file_io.py b/easycv/file/file_io.py
index 04e13743..ac3f5d28 100644
--- a/easycv/file/file_io.py
+++ b/easycv/file/file_io.py
@@ -11,6 +11,8 @@ from typing import List, Union
from tqdm import tqdm
from tqdm.utils import CallbackIOWrapper
+from easycv.framework.errors import (FileNotFoundError, IOError, RuntimeError,
+ ValueError)
from .base import IOLocal
from .utils import (OSS_PREFIX, create_namedtuple, get_oss_config, is_oss_path,
mute_stderr, oss_progress)
@@ -198,7 +200,7 @@ class IO(IOLocal):
time.sleep(3)
if data is None:
- raise ValueError('Read file error: %s!' % full_path)
+ raise IOError('Read file error: %s!' % full_path)
if mode == 'rb':
return NullContextWrapper(BytesIO(data))
diff --git a/easycv/file/image.py b/easycv/file/image.py
index 2b5420b2..a6253427 100644
--- a/easycv/file/image.py
+++ b/easycv/file/image.py
@@ -7,6 +7,7 @@ import numpy as np
from PIL import Image
from easycv.file import io
+from easycv.framework.errors import IOError
from easycv.utils.constant import MAX_READ_IMAGE_TRY_TIMES
from .utils import is_oss_path
@@ -43,6 +44,6 @@ def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
try_cnt += 1
if img is None:
- raise ValueError('Read Image Error: ' + img_path)
+ raise IOError('Read Image Error: ' + img_path)
return img
diff --git a/easycv/file/utils.py b/easycv/file/utils.py
index dcf13f4c..49920611 100644
--- a/easycv/file/utils.py
+++ b/easycv/file/utils.py
@@ -10,6 +10,8 @@ from io import StringIO
from tqdm import tqdm
+from easycv.framework.errors import ValueError
+
OSS_PREFIX = 'oss://'
URL_PREFIX = 'https://'
diff --git a/easycv/framework/__init__.py b/easycv/framework/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/easycv/framework/errors.py b/easycv/framework/errors.py
new file mode 100644
index 00000000..4fa8e8b7
--- /dev/null
+++ b/easycv/framework/errors.py
@@ -0,0 +1,128 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+# total 64 bit
+# 63~64 (question category): 01 (user), ...
+# 60~62 (error severity): 001 (ERROR), 010(WARNING), 011(INFO), 100 (DEBUG), ...
+# 54~59 (product): 00000011 (PAI)
+# 49~53 (sub product): 00000 (none)
+# 41~48 (framework): 00000001 (tensorflow), 00000010 (pytorch)
+# 1~40 (error code)
+OK = 0x5818008000000000
+RUNTIME = 0x4818008000000001
+UNIMPLEMENTED = 0x4818008000000002
+INVALID_ARGUMENT = 0x4818008000000003
+INVALID_VALUE = 0x4818008000000004
+INVALID_KEY = 0x4818008000000005
+INVALID_TYPE = 0x4818008000000006
+MODULE_NOT_FOUND = 0x4818008000000007
+FILE_NOT_FOUND = 0x4818008000000008
+IO_FAILED = 0x4818008000000009
+PERMISSION_DENIED = 0x481800800000000a
+TIMEOUT = 0x481800800000000b
+
+
+class BaseError(Exception):
+ """The base error class for exceptions.
+ """
+ code = None
+
+ def __init__(self, message='', details=None, op=None):
+ """Creates a new `OpError` indicating that a particular op failed.
+
+ Args:
+ message: The message string describing the failure.
+ details: The help message that handle the error.
+ op: The `ops.Operation` that failed, if known; otherwise None. During
+ eager execution, this field is always `None`.
+ """
+ super(BaseError, self).__init__()
+ self._op = op
+ self._message = message
+ self._details = details
+
+ @property
+ def message(self):
+ """The error message that describes the error."""
+ return self._message
+
+ @property
+ def details(self):
+ """The help message that handle the error."""
+ return self._details
+
+ @property
+ def op(self):
+ """The operation that failed, if known.
+ Returns:
+ The `Operation` that failed, or None.
+ """
+ return self._op
+
+ @property
+ def error_code(self):
+ """The integer error code that describes the error."""
+ return hex(self.code)
+
+ def __str__(self):
+ print_str = 'ErrorCode: ' + self.error_code
+ if self.op is not None:
+ print_str += '\n' + 'Operation: ' + str(self.op)
+ print_str += '\n' + 'Message: ' + self.message
+ if self.details is not None:
+ print_str += '\n' + 'Details: ' + self.details
+ return print_str
+
+
+class NotImplementedError(BaseError):
+ """Raised when an operation has not been implemented."""
+ code = UNIMPLEMENTED
+
+
+class RuntimeError(BaseError):
+ """Raised when the system experiences an internal error."""
+ code = RUNTIME
+
+
+class PermissionDeniedError(BaseError):
+ """Raised when the caller does not have permission to run an operation."""
+ code = PERMISSION_DENIED
+
+
+class FileNotFoundError(BaseError):
+ """Raised when a requested entity was not found."""
+ code = FILE_NOT_FOUND
+
+
+class ModuleNotFoundError(BaseError):
+ """Raised when a module could not be located."""
+ code = MODULE_NOT_FOUND
+
+
+class InvalidArgumentError(BaseError):
+ """Raised when an operation receives an invalid argument."""
+ code = INVALID_ARGUMENT
+
+
+class TimeoutError(BaseError):
+ """Raised when an operation timed out."""
+ code = TIMEOUT
+
+
+class IOError(BaseError):
+ """Raised when an operation returns a system-related error, including I/O failures."""
+ code = IO_FAILED
+
+
+class ValueError(BaseError):
+ """Raised when an operation receives an invalid value."""
+ code = INVALID_VALUE
+
+
+class KeyError(BaseError):
+ """Raised when a mapping (dictionary) key is not found in the set of existing keys."""
+ code = INVALID_KEY
+
+
+class TypeError(BaseError):
+ """Raised when an operation or function is applied to an object of inappropriate type."""
+ code = INVALID_TYPE
diff --git a/easycv/hooks/eval_hook.py b/easycv/hooks/eval_hook.py
index e221e160..a4065617 100644
--- a/easycv/hooks/eval_hook.py
+++ b/easycv/hooks/eval_hook.py
@@ -7,6 +7,7 @@ from mmcv.runner import Hook
from torch.utils.data import DataLoader
from easycv.datasets.loader.loader_wrapper import TorchaccLoaderWrapper
+from easycv.framework.errors import TypeError
from easycv.hooks.tensorboard import TensorboardLoggerHookV2
from easycv.hooks.wandb import WandbLoggerHookV2
diff --git a/easycv/hooks/extractor.py b/easycv/hooks/extractor.py
index 6e6acafc..e81be9ae 100644
--- a/easycv/hooks/extractor.py
+++ b/easycv/hooks/extractor.py
@@ -2,6 +2,7 @@
import torch.nn as nn
from torch.utils.data import Dataset
+from easycv.framework.errors import TypeError
from easycv.utils.collect import dist_forward_collect, nondist_forward_collect
diff --git a/easycv/hooks/optimizer_hook.py b/easycv/hooks/optimizer_hook.py
index 44a6d49b..e31be411 100644
--- a/easycv/hooks/optimizer_hook.py
+++ b/easycv/hooks/optimizer_hook.py
@@ -6,6 +6,7 @@ import torch
from mmcv.parallel import is_module_wrapper
from mmcv.runner import OptimizerHook as _OptimizerHook
+from easycv.framework.errors import TypeError
from easycv.utils.dist_utils import get_dist_info
from easycv.utils.torchacc_util import is_torchacc_enabled
@@ -134,7 +135,7 @@ class AMPFP16OptimizerHook(OptimizerHook):
elif isinstance(loss_scale, dict):
self.scaler = amp.GradScaler(**loss_scale)
else:
- raise ValueError(
+ raise TypeError(
'`loss_scale` type must be in [float, dict], but got {loss_scale}'
)
diff --git a/easycv/models/backbones/hrnet.py b/easycv/models/backbones/hrnet.py
index 90730d02..09cb2198 100644
--- a/easycv/models/backbones/hrnet.py
+++ b/easycv/models/backbones/hrnet.py
@@ -7,6 +7,7 @@ from mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,
normal_init)
from torch.nn.modules.batchnorm import _BatchNorm
+from easycv.framework.errors import NotImplementedError, TypeError, ValueError
from easycv.models.registry import BACKBONES
from ..modelzoo import hrnet as model_urls
from .resnet import BasicBlock
diff --git a/easycv/models/backbones/lighthrnet.py b/easycv/models/backbones/lighthrnet.py
index 503f9d24..13846e0e 100644
--- a/easycv/models/backbones/lighthrnet.py
+++ b/easycv/models/backbones/lighthrnet.py
@@ -11,6 +11,7 @@ from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
normal_init)
from torch.nn.modules.batchnorm import _BatchNorm
+from easycv.framework.errors import ValueError
from easycv.models.registry import BACKBONES
diff --git a/easycv/models/backbones/mit.py b/easycv/models/backbones/mit.py
index a9cf2c01..c957b737 100644
--- a/easycv/models/backbones/mit.py
+++ b/easycv/models/backbones/mit.py
@@ -13,6 +13,7 @@ from mmcv.cnn.utils.weight_init import (constant_init, normal_init,
trunc_normal_init)
from mmcv.runner import BaseModule, ModuleList, Sequential
+from easycv.framework.errors import TypeError
from easycv.models.registry import BACKBONES
from easycv.models.segmentation.utils import (PatchEmbed, nchw_to_nlc,
nlc_to_nchw)
diff --git a/easycv/models/backbones/mobilenetv2.py b/easycv/models/backbones/mobilenetv2.py
index 860b26d9..283cd55a 100644
--- a/easycv/models/backbones/mobilenetv2.py
+++ b/easycv/models/backbones/mobilenetv2.py
@@ -5,6 +5,7 @@ r""" This model is taken from the official PyTorch model zoo.
from torch import nn
+from easycv.framework.errors import ValueError
from ..modelzoo import mobilenetv2 as model_urls
from ..registry import BACKBONES
diff --git a/easycv/models/backbones/pytorch_image_models_wrapper.py b/easycv/models/backbones/pytorch_image_models_wrapper.py
index 176d286f..1072056d 100644
--- a/easycv/models/backbones/pytorch_image_models_wrapper.py
+++ b/easycv/models/backbones/pytorch_image_models_wrapper.py
@@ -7,6 +7,7 @@ import torch.nn as nn
from timm.models.helpers import load_pretrained
from timm.models.hub import download_cached_file
+from easycv.framework.errors import ValueError
from easycv.utils.logger import get_root_logger, print_log
from ..modelzoo import timm_models as model_urls
from ..registry import BACKBONES
diff --git a/easycv/models/backbones/resnest.py b/easycv/models/backbones/resnest.py
index 13ef9987..6bd5a08e 100644
--- a/easycv/models/backbones/resnest.py
+++ b/easycv/models/backbones/resnest.py
@@ -14,6 +14,7 @@ import torch.nn.functional as F
from torch.nn import Conv2d, Module, ReLU
from torch.nn.modules.utils import _pair
+from easycv.framework.errors import KeyError, NotImplementedError, RuntimeError
from ..registry import BACKBONES
diff --git a/easycv/models/backbones/resnet.py b/easycv/models/backbones/resnet.py
index 6e083f86..ca5c959f 100644
--- a/easycv/models/backbones/resnet.py
+++ b/easycv/models/backbones/resnet.py
@@ -4,6 +4,7 @@ import torch.utils.checkpoint as cp
from mmcv.cnn import constant_init, kaiming_init
from torch.nn.modules.batchnorm import _BatchNorm
+from easycv.framework.errors import KeyError
from ..modelzoo import resnet as model_urls
from ..registry import BACKBONES
from ..utils import FReLU, build_conv_layer, build_norm_layer
diff --git a/easycv/models/backbones/resnet_jit.py b/easycv/models/backbones/resnet_jit.py
index 0d55e59a..4e241f86 100644
--- a/easycv/models/backbones/resnet_jit.py
+++ b/easycv/models/backbones/resnet_jit.py
@@ -6,6 +6,7 @@ import torch.nn as nn
from mmcv.cnn import constant_init, kaiming_init
from torch.nn.modules.batchnorm import _BatchNorm
+from easycv.framework.errors import KeyError
from ..registry import BACKBONES
from ..utils import build_conv_layer, build_norm_layer
diff --git a/easycv/models/backbones/shuffle_transformer.py b/easycv/models/backbones/shuffle_transformer.py
index 965df17e..c53c103d 100644
--- a/easycv/models/backbones/shuffle_transformer.py
+++ b/easycv/models/backbones/shuffle_transformer.py
@@ -7,6 +7,7 @@ from einops import rearrange
from timm.models.layers import DropPath, trunc_normal_
from torch import nn
+from easycv.framework.errors import NotImplementedError
from ..registry import BACKBONES
diff --git a/easycv/models/backbones/xcit_transformer.py b/easycv/models/backbones/xcit_transformer.py
index 0ee9cf87..18722f1e 100644
--- a/easycv/models/backbones/xcit_transformer.py
+++ b/easycv/models/backbones/xcit_transformer.py
@@ -19,6 +19,7 @@ import torch.nn as nn
from timm.models.layers import DropPath, to_2tuple, trunc_normal_
from timm.models.vision_transformer import Mlp, _cfg
+from easycv.framework.errors import ValueError
from ..registry import BACKBONES
@@ -109,7 +110,7 @@ class ConvPatchEmbed(nn.Module):
conv3x3(embed_dim // 2, embed_dim, 2),
)
else:
- raise (
+ raise ValueError(
'For convolutional projection, patch size has to be in [8, 16]'
)
diff --git a/easycv/models/base.py b/easycv/models/base.py
index 0385baf7..1f4c3278 100644
--- a/easycv/models/base.py
+++ b/easycv/models/base.py
@@ -8,6 +8,8 @@ import torch.distributed as dist
import torch.nn as nn
from torch import Tensor
+from easycv.framework.errors import NotImplementedError, TypeError
+
class BaseModel(nn.Module, metaclass=ABCMeta):
''' base class for model. '''
diff --git a/easycv/models/classification/classification.py b/easycv/models/classification/classification.py
index 34bde969..ccd30d50 100644
--- a/easycv/models/classification/classification.py
+++ b/easycv/models/classification/classification.py
@@ -7,6 +7,7 @@ import torch.nn as nn
from mmcv.runner import get_dist_info
from timm.data.mixup import Mixup
+from easycv.framework.errors import KeyError, NotImplementedError, ValueError
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger, print_log
from easycv.utils.preprocess_function import (bninceptionPre, gaussianBlur,
@@ -300,4 +301,4 @@ class Classification(BaseModel):
rv['gt_labels'] = gt_labels.cpu()
return rv
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
diff --git a/easycv/models/detection/detectors/dab_detr/attention.py b/easycv/models/detection/detectors/dab_detr/attention.py
index ce4b6929..95b952cd 100644
--- a/easycv/models/detection/detectors/dab_detr/attention.py
+++ b/easycv/models/detection/detectors/dab_detr/attention.py
@@ -29,6 +29,8 @@ from torch.nn.init import constant_
from torch.nn.modules.linear import Linear
from torch.nn.modules.module import Module
+from easycv.framework.errors import RuntimeError
+
try:
from torch.overrides import has_torch_function, handle_torch_function
except:
diff --git a/easycv/models/detection/detectors/dab_detr/dab_detr_transformer.py b/easycv/models/detection/detectors/dab_detr/dab_detr_transformer.py
index 06345b11..1802f8e9 100644
--- a/easycv/models/detection/detectors/dab_detr/dab_detr_transformer.py
+++ b/easycv/models/detection/detectors/dab_detr/dab_detr_transformer.py
@@ -14,6 +14,7 @@ import torch
import torch.nn.functional as F
from torch import Tensor, nn
+from easycv.framework.errors import NotImplementedError, ValueError
from easycv.models.builder import NECKS
from easycv.models.detection.utils import inverse_sigmoid
from easycv.models.utils import (MLP, TransformerEncoder,
diff --git a/easycv/models/detection/detectors/detection.py b/easycv/models/detection/detectors/detection.py
index fe91fbf8..bcd8edf0 100644
--- a/easycv/models/detection/detectors/detection.py
+++ b/easycv/models/detection/detectors/detection.py
@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.framework.errors import ValueError
from easycv.models.base import BaseModel
from easycv.models.builder import (MODELS, build_backbone, build_head,
build_neck)
diff --git a/easycv/models/detection/detectors/dino/deformable_transformer.py b/easycv/models/detection/detectors/dino/deformable_transformer.py
index 57d5f51d..447af568 100644
--- a/easycv/models/detection/detectors/dino/deformable_transformer.py
+++ b/easycv/models/detection/detectors/dino/deformable_transformer.py
@@ -15,6 +15,7 @@ from typing import Optional
import torch
from torch import Tensor, nn
+from easycv.framework.errors import NotImplementedError
from easycv.models.builder import NECKS
from easycv.models.detection.utils import (gen_encoder_output_proposals,
gen_sineembed_for_position,
diff --git a/easycv/models/detection/detectors/dino/dino_head.py b/easycv/models/detection/detectors/dino/dino_head.py
index bd581418..19ac173c 100644
--- a/easycv/models/detection/detectors/dino/dino_head.py
+++ b/easycv/models/detection/detectors/dino/dino_head.py
@@ -7,6 +7,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
+from easycv.framework.errors import NotImplementedError
from easycv.models.builder import HEADS, build_neck
from easycv.models.detection.utils import (DetrPostProcess, box_xyxy_to_cxcywh,
inverse_sigmoid)
diff --git a/easycv/models/detection/detectors/yolox/asff.py b/easycv/models/detection/detectors/yolox/asff.py
index d4c62c3c..7af1ae84 100644
--- a/easycv/models/detection/detectors/yolox/asff.py
+++ b/easycv/models/detection/detectors/yolox/asff.py
@@ -3,6 +3,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
+from easycv.framework.errors import ValueError
from easycv.models.backbones.network_blocks import BaseConv
diff --git a/easycv/models/detection/detectors/yolox/yolo_head_template.py b/easycv/models/detection/detectors/yolox/yolo_head_template.py
index 63923abf..a8e4fb03 100644
--- a/easycv/models/detection/detectors/yolox/yolo_head_template.py
+++ b/easycv/models/detection/detectors/yolox/yolo_head_template.py
@@ -8,6 +8,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
+from easycv.framework.errors import KeyError, RuntimeError
from easycv.models.backbones.network_blocks import BaseConv, DWConv
from easycv.models.backbones.repvgg_yolox_backbone import RepVGGBlock
from easycv.models.detection.utils import bboxes_iou
diff --git a/easycv/models/detection/necks/fpn.py b/easycv/models/detection/necks/fpn.py
index 8018903c..6f71eda0 100644
--- a/easycv/models/detection/necks/fpn.py
+++ b/easycv/models/detection/necks/fpn.py
@@ -3,6 +3,7 @@ import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule
+from easycv.framework.errors import NotImplementedError
from easycv.models.registry import NECKS
diff --git a/easycv/models/detection/necks/sfp.py b/easycv/models/detection/necks/sfp.py
index b588f643..62e581ea 100644
--- a/easycv/models/detection/necks/sfp.py
+++ b/easycv/models/detection/necks/sfp.py
@@ -3,6 +3,7 @@ import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule
+from easycv.framework.errors import NotImplementedError
from easycv.models.builder import NECKS
diff --git a/easycv/models/detection/utils/misc.py b/easycv/models/detection/utils/misc.py
index a9605a3b..9a2de7a9 100644
--- a/easycv/models/detection/utils/misc.py
+++ b/easycv/models/detection/utils/misc.py
@@ -9,6 +9,8 @@ from packaging import version
from torch import Tensor
from torch.autograd import Function
+from easycv.framework.errors import NotImplementedError
+
if version.parse(torchvision.__version__) < version.parse('0.7'):
from torchvision.ops import _new_empty_tensor
from torchvision.ops.misc import _output_size
diff --git a/easycv/models/loss/cross_entropy_loss.py b/easycv/models/loss/cross_entropy_loss.py
index 0f9d5074..ad8661cb 100644
--- a/easycv/models/loss/cross_entropy_loss.py
+++ b/easycv/models/loss/cross_entropy_loss.py
@@ -7,6 +7,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
+from easycv.framework.errors import ValueError
from easycv.models.builder import LOSSES
from easycv.models.loss.utils import weight_reduce_loss
diff --git a/easycv/models/loss/focal_loss.py b/easycv/models/loss/focal_loss.py
index 0cec5ddb..f4ea5a47 100644
--- a/easycv/models/loss/focal_loss.py
+++ b/easycv/models/loss/focal_loss.py
@@ -4,6 +4,7 @@ import torch.nn as nn
import torch.nn.functional as F
from mmcv.ops import sigmoid_focal_loss as _sigmoid_focal_loss
+from easycv.framework.errors import NotImplementedError
from easycv.models.builder import LOSSES
from easycv.models.loss.utils import weight_reduce_loss
diff --git a/easycv/models/loss/iou_loss.py b/easycv/models/loss/iou_loss.py
index 8a4af4bb..72611f02 100644
--- a/easycv/models/loss/iou_loss.py
+++ b/easycv/models/loss/iou_loss.py
@@ -7,6 +7,7 @@ import mmcv
import torch
import torch.nn as nn
+from easycv.framework.errors import NotImplementedError
from easycv.models.detection.utils import bbox_overlaps
from easycv.models.loss.utils import weighted_loss
from ..registry import LOSSES
diff --git a/easycv/models/loss/utils.py b/easycv/models/loss/utils.py
index b08e7cf3..0164b104 100644
--- a/easycv/models/loss/utils.py
+++ b/easycv/models/loss/utils.py
@@ -4,6 +4,8 @@ import functools
import torch
import torch.nn.functional as F
+from easycv.framework.errors import ValueError
+
def reduce_loss(loss, reduction):
"""Reduce loss as specified.
diff --git a/easycv/models/pose/heads/topdown_heatmap_base_head.py b/easycv/models/pose/heads/topdown_heatmap_base_head.py
index adc3dfa6..afcfc8f3 100644
--- a/easycv/models/pose/heads/topdown_heatmap_base_head.py
+++ b/easycv/models/pose/heads/topdown_heatmap_base_head.py
@@ -7,6 +7,7 @@ import numpy as np
import torch.nn as nn
from easycv.core.evaluation.top_down_eval import keypoints_from_heatmaps
+from easycv.framework.errors import ValueError
class TopdownHeatmapBaseHead(nn.Module):
diff --git a/easycv/models/pose/heads/topdown_heatmap_simple_head.py b/easycv/models/pose/heads/topdown_heatmap_simple_head.py
index ba3c746b..8811d7eb 100644
--- a/easycv/models/pose/heads/topdown_heatmap_simple_head.py
+++ b/easycv/models/pose/heads/topdown_heatmap_simple_head.py
@@ -7,6 +7,7 @@ from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
from easycv.core.evaluation import pose_pck_accuracy
from easycv.core.post_processing import flip_back
+from easycv.framework.errors import TypeError, ValueError
from easycv.models.builder import HEADS, build_loss
from easycv.models.utils.ops import resize_tensor as resize
from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
diff --git a/easycv/models/segmentation/encoder_decoder.py b/easycv/models/segmentation/encoder_decoder.py
index 6b96c98f..63577528 100644
--- a/easycv/models/segmentation/encoder_decoder.py
+++ b/easycv/models/segmentation/encoder_decoder.py
@@ -3,6 +3,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
+from easycv.framework.errors import TypeError, ValueError
from easycv.models import builder
from easycv.models.base import BaseModel
from easycv.models.builder import MODELS
diff --git a/easycv/models/segmentation/heads/base.py b/easycv/models/segmentation/heads/base.py
index f1508a25..3aaf85ff 100644
--- a/easycv/models/segmentation/heads/base.py
+++ b/easycv/models/segmentation/heads/base.py
@@ -7,6 +7,7 @@ import torch.nn as nn
from mmcv.cnn.utils import initialize
from easycv.core.evaluation.metrics import accuracy
+from easycv.framework.errors import TypeError
from easycv.models.builder import build_loss
from easycv.models.utils.ops import resize_tensor
from easycv.utils.logger import print_log
diff --git a/easycv/models/segmentation/heads/transformer_decoder.py b/easycv/models/segmentation/heads/transformer_decoder.py
index 3a42072d..88ef6303 100644
--- a/easycv/models/segmentation/heads/transformer_decoder.py
+++ b/easycv/models/segmentation/heads/transformer_decoder.py
@@ -5,6 +5,8 @@ import torch
from torch import Tensor, nn
from torch.nn import functional as F
+from easycv.framework.errors import RuntimeError, ValueError
+
class PositionEmbeddingSine(nn.Module):
"""
diff --git a/easycv/models/segmentation/utils/criterion.py b/easycv/models/segmentation/utils/criterion.py
index 490b2cb7..29345d12 100644
--- a/easycv/models/segmentation/utils/criterion.py
+++ b/easycv/models/segmentation/utils/criterion.py
@@ -8,6 +8,7 @@ import torchvision
from mmcv.runner import get_dist_info
from torch import Tensor, nn
+from easycv.framework.errors import ValueError
from .point_rend import (get_uncertain_point_coords_with_randomness,
point_sample)
diff --git a/easycv/models/selfsup/byol.py b/easycv/models/selfsup/byol.py
index 44d42d10..9bdce95d 100644
--- a/easycv/models/selfsup/byol.py
+++ b/easycv/models/selfsup/byol.py
@@ -2,6 +2,7 @@
import torch
import torch.nn as nn
+from easycv.framework.errors import KeyError
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger
from .. import builder
@@ -97,4 +98,4 @@ class BYOL(BaseModel):
elif mode == 'extract':
return self.backbone(img)
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
diff --git a/easycv/models/selfsup/dino.py b/easycv/models/selfsup/dino.py
index f9978974..1e21cad4 100644
--- a/easycv/models/selfsup/dino.py
+++ b/easycv/models/selfsup/dino.py
@@ -9,6 +9,7 @@ import torch.nn as nn
import torch.nn.functional as F
from mmcv.runner import get_dist_info
+from easycv.framework.errors import KeyError, NotImplementedError, ValueError
from easycv.utils.preprocess_function import (gaussianBlurDynamic,
randomGrayScale, solarize)
from .. import builder
@@ -416,4 +417,4 @@ class DINO(BaseModel):
# rv['gt_labels'] = gt_label.cpu()
# return rv
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
diff --git a/easycv/models/selfsup/mae.py b/easycv/models/selfsup/mae.py
index 9efb686c..de9d062a 100644
--- a/easycv/models/selfsup/mae.py
+++ b/easycv/models/selfsup/mae.py
@@ -1,5 +1,6 @@
import torch
+from easycv.framework.errors import KeyError
from .. import builder
from ..base import BaseModel
from ..registry import MODELS
@@ -84,4 +85,4 @@ class MAE(BaseModel):
elif mode == 'test':
return self.forward_test(img, **kwargs)
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
diff --git a/easycv/models/selfsup/moby.py b/easycv/models/selfsup/moby.py
index 8ed0e9e5..94ebb5c5 100644
--- a/easycv/models/selfsup/moby.py
+++ b/easycv/models/selfsup/moby.py
@@ -3,6 +3,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
+from easycv.framework.errors import KeyError, ValueError
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger
from easycv.utils.preprocess_function import gaussianBlur, randomGrayScale
@@ -269,12 +270,14 @@ class MoBY(BaseModel):
if name in rd.keys():
rv[name] = rd[name]
else:
- raise 'Extract %s is not support in classification models' % name
+ raise ValueError(
+ 'Extract %s is not support in classification models' %
+ name)
if gt_label is not None:
rv['gt_labels'] = gt_label.cpu()
return rv
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
# utils
diff --git a/easycv/models/selfsup/moco.py b/easycv/models/selfsup/moco.py
index 5094f61d..e566f726 100644
--- a/easycv/models/selfsup/moco.py
+++ b/easycv/models/selfsup/moco.py
@@ -2,6 +2,7 @@
import torch
import torch.nn as nn
+from easycv.framework.errors import KeyError, ValueError
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger
from easycv.utils.preprocess_function import gaussianBlur, randomGrayScale
@@ -232,12 +233,14 @@ class MOCO(BaseModel):
if name in rd.keys():
rv[name] = rd[name]
else:
- raise 'Extract %s is not support in classification models' % name
+ raise ValueError(
+ 'Extract %s is not support in classification models' %
+ name)
if gt_label is not None:
rv['gt_labels'] = gt_label.cpu()
return rv
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
# utils
diff --git a/easycv/models/selfsup/simclr.py b/easycv/models/selfsup/simclr.py
index 3b26eaf4..28e1b324 100644
--- a/easycv/models/selfsup/simclr.py
+++ b/easycv/models/selfsup/simclr.py
@@ -1,6 +1,7 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import torch
+from easycv.framework.errors import KeyError
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger
from easycv.utils.preprocess_function import gaussianBlur, randomGrayScale
@@ -97,4 +98,4 @@ class SimCLR(BaseModel):
elif mode == 'extract':
return self.forward_backbone(img)
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
diff --git a/easycv/models/selfsup/swav.py b/easycv/models/selfsup/swav.py
index a7e8af4f..1393fc29 100644
--- a/easycv/models/selfsup/swav.py
+++ b/easycv/models/selfsup/swav.py
@@ -5,6 +5,7 @@ import torch.distributed as dist
import torch.nn as nn
from mmcv.runner import get_dist_info
+from easycv.framework.errors import KeyError, ValueError
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.logger import get_root_logger
from easycv.utils.preprocess_function import gaussianBlur, randomGrayScale
@@ -193,12 +194,14 @@ class SWAV(BaseModel):
if name in rd.keys():
rv[name] = rd[name]
else:
- raise 'Extract %s is not support in classification models' % name
+ raise ValueError(
+ 'Extract %s is not support in classification models' %
+ name)
if gt_label is not None:
rv['gt_labels'] = gt_label.cpu()
return rv
else:
- raise Exception('No such mode: {}'.format(mode))
+ raise KeyError('No such mode: {}'.format(mode))
class MultiPrototypes(nn.Module):
diff --git a/easycv/models/utils/conv_module.py b/easycv/models/utils/conv_module.py
index 26364a43..bfebf816 100644
--- a/easycv/models/utils/conv_module.py
+++ b/easycv/models/utils/conv_module.py
@@ -4,6 +4,7 @@ import warnings
import torch.nn as nn
from mmcv.cnn import constant_init, kaiming_init
+from easycv.framework.errors import KeyError
from .activation import build_activation_layer
from .conv_ws import ConvWS2d
from .norm import build_norm_layer
diff --git a/easycv/models/utils/norm.py b/easycv/models/utils/norm.py
index 85191b55..f5ca46e7 100644
--- a/easycv/models/utils/norm.py
+++ b/easycv/models/utils/norm.py
@@ -2,6 +2,8 @@
import torch
import torch.nn as nn
+from easycv.framework.errors import KeyError, NotImplementedError
+
class SyncIBN(nn.Module):
r"""Instance-Batch Normalization layer from
diff --git a/easycv/models/utils/transformer.py b/easycv/models/utils/transformer.py
index e76fbb44..3cb19931 100644
--- a/easycv/models/utils/transformer.py
+++ b/easycv/models/utils/transformer.py
@@ -6,6 +6,8 @@ import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
+from easycv.framework.errors import RuntimeError
+
class MLP(nn.Module):
""" Very simple multi-layer perceptron (also called FFN)"""
diff --git a/easycv/predictors/base.py b/easycv/predictors/base.py
index 49f3a728..d65b1bb6 100644
--- a/easycv/predictors/base.py
+++ b/easycv/predictors/base.py
@@ -10,6 +10,7 @@ from torchvision.transforms import Compose
from easycv.datasets.registry import PIPELINES
from easycv.file import io
+from easycv.framework.errors import ValueError
from easycv.models.builder import build_model
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.config_tools import mmcv_config_fromfile
diff --git a/easycv/predictors/classifier.py b/easycv/predictors/classifier.py
index be3a9967..e29d6736 100644
--- a/easycv/predictors/classifier.py
+++ b/easycv/predictors/classifier.py
@@ -4,6 +4,7 @@ import math
import numpy as np
import torch
+from easycv.framework.errors import ValueError
from .base import Predictor
from .builder import PREDICTORS
@@ -30,7 +31,7 @@ class TorchClassifier(PredictorInterface):
"""
self.predictor = Predictor(model_path)
if 'class_list' not in self.predictor.cfg and label_map_path is None:
- raise Exception(
+ raise ValueError(
"label_map_path need to be set, when ckpt doesn't contain class_list"
)
diff --git a/easycv/predictors/detector.py b/easycv/predictors/detector.py
index 017d671e..7637ca67 100644
--- a/easycv/predictors/detector.py
+++ b/easycv/predictors/detector.py
@@ -16,6 +16,7 @@ from easycv.datasets.registry import PIPELINES
from easycv.datasets.utils import replace_ImageToTensor
from easycv.file import io
from easycv.file.utils import is_url_path, url_path_exists
+from easycv.framework.errors import TypeError
from easycv.models import build_model
from easycv.models.detection.utils import postprocess
from easycv.utils.checkpoint import load_checkpoint
diff --git a/easycv/predictors/feature_extractor.py b/easycv/predictors/feature_extractor.py
index 79110bb9..fc0802af 100644
--- a/easycv/predictors/feature_extractor.py
+++ b/easycv/predictors/feature_extractor.py
@@ -6,6 +6,7 @@ import numpy as np
import torch
from PIL import Image
+from easycv.framework.errors import ValueError
from .base import Predictor
from .builder import PREDICTORS
@@ -522,7 +523,7 @@ class TorchFaceAttrExtractor(PredictorInterface):
neck_output.device)
neck_output = (distribute * neck_output).sum(dim=1)
else:
- raise Exception(
+ raise ValueError(
'TorchFaceAttrExtractor for neck %d only support attr_method softmax/distributed sum'
% (neck_idx))
neck_output = torch.argmax(neck_output, dim=1)
diff --git a/easycv/predictors/pose_predictor.py b/easycv/predictors/pose_predictor.py
index 34ca5475..b2408051 100644
--- a/easycv/predictors/pose_predictor.py
+++ b/easycv/predictors/pose_predictor.py
@@ -11,6 +11,7 @@ from torchvision.transforms import Compose
from easycv.datasets.pose.data_sources.top_down import DatasetInfo
from easycv.datasets.registry import PIPELINES
from easycv.file import io
+from easycv.framework.errors import ModuleNotFoundError, TypeError, ValueError
from easycv.models import build_model
from easycv.predictors.builder import PREDICTORS
from easycv.predictors.detector import TorchYoloXPredictor
diff --git a/easycv/runner/ev_runner.py b/easycv/runner/ev_runner.py
index d8b9cb14..7921808b 100644
--- a/easycv/runner/ev_runner.py
+++ b/easycv/runner/ev_runner.py
@@ -8,6 +8,7 @@ from mmcv.runner import EpochBasedRunner
from mmcv.runner.log_buffer import LogBuffer
from easycv.file import io
+from easycv.framework.errors import RuntimeError, TypeError
from easycv.utils.checkpoint import load_checkpoint, save_checkpoint
if LooseVersion(torch.__version__) >= LooseVersion('1.6.0'):
diff --git a/easycv/toolkit/blade/cv_blade_utils.py b/easycv/toolkit/blade/cv_blade_utils.py
index 0bfcb8f4..cd742161 100644
--- a/easycv/toolkit/blade/cv_blade_utils.py
+++ b/easycv/toolkit/blade/cv_blade_utils.py
@@ -17,6 +17,8 @@ import torch_blade.tensorrt
import torchvision
from torch_blade import optimize
+from easycv.framework.errors import RuntimeError
+
os.environ['DISC_ENABLE_STITCH'] = os.environ.get('DISC_ENABLE_STITCH', 'true')
os.environ['DISC_EXPERIMENTAL_SPECULATION_TLP_ENHANCE'] = os.environ.get(
'DISC_EXPERIMENTAL_SPECULATION_TLP_ENHANCE', 'true')
@@ -103,13 +105,13 @@ def opt_trt_config(
def cu_prof_start():
ret = _cudart.cudaProfilerStart()
if ret != 0:
- raise Exception('cudaProfilerStart() returned %d' % ret)
+ raise RuntimeError('cudaProfilerStart() returned %d' % ret)
def cu_prof_stop():
ret = _cudart.cudaProfilerStop()
if ret != 0:
- raise Exception('cudaProfilerStop() returned %d' % ret)
+ raise RuntimeError('cudaProfilerStop() returned %d' % ret)
@contextmanager
diff --git a/easycv/toolkit/prune/prune_utils.py b/easycv/toolkit/prune/prune_utils.py
index b9fb2aa2..48f05350 100644
--- a/easycv/toolkit/prune/prune_utils.py
+++ b/easycv/toolkit/prune/prune_utils.py
@@ -1,4 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+from easycv.framework.errors import ValueError
+
try:
from nni.algorithms.compression.pytorch.pruning import AGPPrunerV2
except ImportError:
@@ -83,7 +85,7 @@ def load_pruner(model,
optimizer=optimizer,
pruning_algorithm=pruning_algorithm)
else:
- raise Exception(
+ raise ValueError(
'pruning class {} is not supported'.format(pruning_class))
return pruner
diff --git a/easycv/toolkit/quantize/quantize_utils.py b/easycv/toolkit/quantize/quantize_utils.py
index c7ef8aa8..41ce1ac1 100644
--- a/easycv/toolkit/quantize/quantize_utils.py
+++ b/easycv/toolkit/quantize/quantize_utils.py
@@ -7,6 +7,7 @@ import numpy as np
import torch
from mmcv.parallel import scatter_kwargs
+from easycv.framework.errors import ValueError
from easycv.models.detection.detectors.yolox.yolo_head import YOLOXHead
from easycv.models.detection.utils import output_postprocess, postprocess
diff --git a/easycv/toolkit/torchacc/convert_ops.py b/easycv/toolkit/torchacc/convert_ops.py
index 2e3d69ee..c17f898c 100644
--- a/easycv/toolkit/torchacc/convert_ops.py
+++ b/easycv/toolkit/torchacc/convert_ops.py
@@ -10,6 +10,8 @@ import torchacc.torch_xla.core.xla_model as xm
from prettytable import PrettyTable
from torch.distributed import ReduceOp
+from easycv.framework.errors import ValueError
+
DEFAULT_TAG = 'EasyCV-default-barrier-tag'
OpSpec = namedtuple('OpSpec', ['module', 'name', 'value'])
diff --git a/easycv/utils/checkpoint.py b/easycv/utils/checkpoint.py
index 4bf0af60..c583d9a0 100644
--- a/easycv/utils/checkpoint.py
+++ b/easycv/utils/checkpoint.py
@@ -8,6 +8,7 @@ from mmcv.runner.checkpoint import get_state_dict, weights_to_cpu
from torch.optim import Optimizer
from easycv.file import io
+from easycv.framework.errors import TypeError
from easycv.utils.constant import CACHE_DIR
diff --git a/easycv/utils/collect.py b/easycv/utils/collect.py
index 887ac417..904a44c9 100644
--- a/easycv/utils/collect.py
+++ b/easycv/utils/collect.py
@@ -5,6 +5,7 @@ import mmcv
import numpy as np
import torch
+from easycv.framework.errors import ValueError
from .gather import gather_tensors_batch
diff --git a/easycv/utils/config_tools.py b/easycv/utils/config_tools.py
index adc846df..90386c89 100644
--- a/easycv/utils/config_tools.py
+++ b/easycv/utils/config_tools.py
@@ -7,6 +7,7 @@ from importlib import import_module
from mmcv import Config, import_modules_from_strings
+from easycv.framework.errors import IOError, KeyError, ValueError
from .user_config_params_utils import check_value_type
if platform.system() == 'Windows':
@@ -58,7 +59,7 @@ def check_base_cfg_path(base_cfg_name='configs/base.py', ori_filename=None):
if osp.exists(base_cfg_path_3):
return base_cfg_path_3
- raise '%s not Found' % base_cfg_name
+ raise ValueError('%s not Found' % base_cfg_name)
# Read config without __base__
@@ -69,7 +70,7 @@ def mmcv_file2dict_raw(ori_filename):
# read configs/config_templates/detection_oss.py
filename = check_base_cfg_path(ori_filename)
else:
- raise '%s and %s not Found' % (ori_filename, filename)
+ raise ValueError('%s and %s not Found' % (ori_filename, filename))
fileExtname = osp.splitext(filename)[1]
if fileExtname not in ['.py', '.json', '.yaml', '.yml']:
diff --git a/easycv/utils/json_utils.py b/easycv/utils/json_utils.py
index 05dacacb..536966d6 100644
--- a/easycv/utils/json_utils.py
+++ b/easycv/utils/json_utils.py
@@ -23,6 +23,8 @@ from json import encoder
import numpy as np
+from easycv.framework.errors import ValueError
+
# python 3.5 and newer version does not have json.encoder.FLOAT_REPR
needs_class_hack = sys.version_info >= (3, 5)
diff --git a/easycv/utils/logger.py b/easycv/utils/logger.py
index 0c785311..9183af1e 100644
--- a/easycv/utils/logger.py
+++ b/easycv/utils/logger.py
@@ -3,6 +3,8 @@ import logging
from mmcv.utils import get_logger
+from easycv.framework.errors import TypeError
+
def get_root_logger(log_file=None, log_level=logging.INFO):
"""Get the root logger.
diff --git a/easycv/utils/mmlab_utils.py b/easycv/utils/mmlab_utils.py
index 17899d08..e4e2df86 100644
--- a/easycv/utils/mmlab_utils.py
+++ b/easycv/utils/mmlab_utils.py
@@ -11,6 +11,7 @@ import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule
+from easycv.framework.errors import TypeError, ValueError
from easycv.models.registry import BACKBONES, HEADS, MODELS, NECKS
from .test_util import run_in_subprocess
@@ -159,7 +160,7 @@ class MMAdapter:
elif inspect.isclass(module_name):
module_obj = module_name
else:
- raise ValueError(
+ raise TypeError(
'Only support type `str` and `class` object, but get type {}'.
format(type(module_name)))
return module_obj
diff --git a/easycv/utils/registry.py b/easycv/utils/registry.py
index b9e19b7d..19d9ab5d 100644
--- a/easycv/utils/registry.py
+++ b/easycv/utils/registry.py
@@ -4,6 +4,8 @@ from functools import partial
import mmcv
+from easycv.framework.errors import KeyError, TypeError
+
class Registry(object):
diff --git a/easycv/utils/test_util.py b/easycv/utils/test_util.py
index 169b12e8..ddd7245d 100644
--- a/easycv/utils/test_util.py
+++ b/easycv/utils/test_util.py
@@ -18,6 +18,7 @@ import numpy as np
import torch
from easycv.file import io
+from easycv.framework.errors import RuntimeError
TEST_DIR = '/tmp/ev_pytorch_test'
diff --git a/easycv/utils/user_config_params_utils.py b/easycv/utils/user_config_params_utils.py
index 45d2772d..558741c9 100644
--- a/easycv/utils/user_config_params_utils.py
+++ b/easycv/utils/user_config_params_utils.py
@@ -1,3 +1,5 @@
+from easycv.framework.errors import TypeError
+
VALID_TYPES = {tuple, list, str, int, float, bool, type(None)}
diff --git a/tests/core/evaluation/test_coco_evaluation.py b/tests/core/evaluation/test_coco_evaluation.py
index badf94a9..7ea3a706 100644
--- a/tests/core/evaluation/test_coco_evaluation.py
+++ b/tests/core/evaluation/test_coco_evaluation.py
@@ -21,6 +21,7 @@ import numpy as np
from easycv.core import standard_fields
from easycv.core.evaluation import coco_evaluation
+from easycv.framework.errors import ValueError
class CocoDetectionEvaluationTest(unittest.TestCase):
diff --git a/tests/core/optimizer/test_optimizers.py b/tests/core/optimizer/test_optimizers.py
index fa569020..3a1c538e 100644
--- a/tests/core/optimizer/test_optimizers.py
+++ b/tests/core/optimizer/test_optimizers.py
@@ -9,6 +9,8 @@ from torch.autograd import Variable
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau, StepLR
from torch.testing._internal.common_utils import TestCase
+from easycv.framework.errors import ValueError
+
@unittest.skipIf(
LooseVersion(torch.__version__) < LooseVersion('1.6.0'),
diff --git a/tests/datasets/detection/data_sources/test_det_voc_datasource.py b/tests/datasets/detection/data_sources/test_det_voc_datasource.py
index 8e016415..cb409c59 100644
--- a/tests/datasets/detection/data_sources/test_det_voc_datasource.py
+++ b/tests/datasets/detection/data_sources/test_det_voc_datasource.py
@@ -8,6 +8,7 @@ from tests.ut_config import DET_DATA_SMALL_VOC_LOCAL, VOC_CLASSES
from easycv.datasets.detection.data_sources.voc import DetSourceVOC
from easycv.file import io
+from easycv.framework.errors import ValueError
class DetSourceVOCTest(unittest.TestCase):
@@ -135,7 +136,7 @@ class DetSourceVOCTest(unittest.TestCase):
self.assertEqual(num_samples, 20)
self.assertEqual(data_source._retry_count, 2)
- self.assertEqual(exception.args[0], 'All samples failed to load!')
+ self.assertEqual(exception.message, 'All samples failed to load!')
if __name__ == '__main__':
diff --git a/tests/framework/__init__.py b/tests/framework/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/framework/test_errors.py b/tests/framework/test_errors.py
new file mode 100644
index 00000000..a01d3290
--- /dev/null
+++ b/tests/framework/test_errors.py
@@ -0,0 +1,52 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+
+class ErrorsTest(unittest.TestCase):
+
+ def setUp(self):
+ print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+ def test_errors(self):
+ from easycv.framework import errors
+
+ def dummy_op():
+ pass
+
+ with self.assertRaises(errors.ValueError) as cm:
+ raise errors.ValueError(
+ 'value error', details='provide correct value', op=dummy_op)
+ value_exception = cm.exception
+ self.assertEqual(value_exception.error_code, hex(errors.INVALID_VALUE))
+ self.assertEqual(value_exception.op, dummy_op)
+ self.assertEqual(value_exception.details, 'provide correct value')
+ self.assertEqual(value_exception.message, 'value error')
+
+ with self.assertRaises(errors.NotImplementedError) as cm:
+ raise errors.NotImplementedError()
+ value_exception = cm.exception
+ self.assertEqual(value_exception.error_code, hex(errors.UNIMPLEMENTED))
+ self.assertEqual(value_exception.op, None)
+ self.assertEqual(value_exception.details, None)
+ self.assertEqual(value_exception.message, '')
+
+ with self.assertRaises(errors.FileNotFoundError) as cm:
+ raise errors.FileNotFoundError
+ value_exception = cm.exception
+ self.assertEqual(value_exception.error_code,
+ hex(errors.FILE_NOT_FOUND))
+ self.assertEqual(value_exception.op, None)
+ self.assertEqual(value_exception.details, None)
+ self.assertEqual(value_exception.message, '')
+
+ with self.assertRaises(errors.TimeoutError) as cm:
+ raise errors.TimeoutError('time out')
+ value_exception = cm.exception
+ self.assertEqual(value_exception.error_code, hex(errors.TIMEOUT))
+ self.assertEqual(value_exception.op, None)
+ self.assertEqual(value_exception.details, None)
+ self.assertEqual(value_exception.message, 'time out')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/utils/test_json_utils.py b/tests/utils/test_json_utils.py
index f3d60696..7c906ae6 100644
--- a/tests/utils/test_json_utils.py
+++ b/tests/utils/test_json_utils.py
@@ -21,6 +21,7 @@ import tempfile
import unittest
from easycv.file import io
+from easycv.framework.errors import ValueError
from easycv.utils import json_utils
diff --git a/tools/eval.py b/tools/eval.py
index 60be08df..66b69f44 100644
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -32,6 +32,7 @@ from easycv.utils.config_tools import (CONFIG_TEMPLATE_ZOO,
from easycv.utils.mmlab_utils import dynamic_adapt_for_mmlab
from easycv.utils.setup_env import setup_multi_processes
+from easycv.framework.errors import ValueError, NotImplementedError
from easycv.utils.misc import reparameterize_models
@@ -251,8 +252,7 @@ def main():
eval_kwargs.update(args.options)
if args.inference_only:
- raise RuntimeError('not implemented')
- dataset.format_results(outputs, **eval_kwargs)
+ raise NotImplementedError('not implemented')
if args.eval:
for t in eval_pipe.evaluators:
if 'metric_type' in t:
From b376d84fe0cc7cef9cb5739fdef170f6137e4d4b Mon Sep 17 00:00:00 2001
From: Cathy0908 <30484308+Cathy0908@users.noreply.github.com>
Date: Mon, 19 Sep 2022 18:00:34 +0800
Subject: [PATCH 7/9] add train memory (#178)
* add train memory
---
docs/source/model_zoo_det.md | 45 ++++++++++++++++++------------------
docs/source/model_zoo_seg.md | 26 ++++++++++-----------
docs/source/model_zoo_ssl.md | 42 ++++++++++++++++-----------------
3 files changed, 56 insertions(+), 57 deletions(-)
diff --git a/docs/source/model_zoo_det.md b/docs/source/model_zoo_det.md
index 474496f0..ccabed31 100644
--- a/docs/source/model_zoo_det.md
+++ b/docs/source/model_zoo_det.md
@@ -6,38 +6,37 @@ Inference default use V100 16G.
Pretrained on COCO2017 dataset. (The result has been optimized with PAI-Blade, and only computes the model inference time. To learn about end2end inference time, you can refer to [export.md](./tutorials/export.md).)
-| Algorithm | Config | Params | SpeedV100
fp16 b32 | mAPval
0.5:0.95 | APval
50 | Download |
-|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|--------|-----------------------------------------|-------------------------------------|------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| YOLOX-s | [yolox_s_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_s_8xb16_300e_coco.py) | 9M | 0.68ms | 40.0 | 58.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/log.txt) |
-| PAI-YOLOXs | [yoloxs_pai_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_8xb16_300e_coco.py) | 16M | 0.71ms | 41.4 | 60.0 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs.json) |
-| PAI-YOLOXs-ASFF | [yoloxs_pai_asff_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_8xb16_300e_coco.py) | 21M | 0.87ms | 42.8 | 61.8 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff.json) |
+| Algorithm | Config | Params | SpeedV100
fp16 b32 | mAPval
0.5:0.95 | APval
50 | Download |
+| --------------------- | ------------------------------------------------------------ | ------ | --------------------------------------- | ----------------------------------- | ---------------------------- | ------------------------------------------------------------ |
+| YOLOX-s | [yolox_s_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_s_8xb16_300e_coco.py) | 9M | 0.68ms | 40.0 | 58.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_s_bs16_lr002/log.txt) |
+| PAI-YOLOXs | [yoloxs_pai_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_8xb16_300e_coco.py) | 16M | 0.71ms | 41.4 | 60.0 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs.json) |
+| PAI-YOLOXs-ASFF | [yoloxs_pai_asff_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_8xb16_300e_coco.py) | 21M | 0.87ms | 42.8 | 61.8 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff.json) |
| PAI-YOLOXs-ASFF-TOOD3 | [yoloxs_pai_asff_tood3_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/pai_yoloxs_asff_tood3_8xb16_300e_coco.py) | 24M | 1.15ms | 43.9 | 62.1 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/model/pai_yoloxs_asff_tood3.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox-pai/log/pai_yoloxs_asff_tood3.json) |
-| YOLOX-m | [yolox_m_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb16_300e_coco.py) | 25M | 1.52ms | 46.3 | 64.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/log.txt) |
-| YOLOX-l | [yolox_l_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb8_300e_coco.py) | 54M | 2.47ms | 48.9 | 67.5 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/log.txt) |
-| YOLOX-x | [yolox_x_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_x_8xb8_300e_coco.py) | 99M | 4.74ms | 50.9 | 69.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/log.txt) |
-| YOLOX-tiny | [yolox_tiny_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 5M | 0.28ms | 31.5 | 49.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/log.txt) |
-| YOLOX-nano | [yolox_nano_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 2.2M | 0.19ms | 26.5 | 42.6 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/log.txt) |
+| YOLOX-m | [yolox_m_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb16_300e_coco.py) | 25M | 1.52ms | 46.3 | 64.9 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_m_bs16_lr002/log.txt) |
+| YOLOX-l | [yolox_l_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_m_8xb8_300e_coco.py) | 54M | 2.47ms | 48.9 | 67.5 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_l_bs8_lr001/log.txt) |
+| YOLOX-x | [yolox_x_8xb8_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_x_8xb8_300e_coco.py) | 99M | 4.74ms | 50.9 | 69.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/epoch_290.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_x_bs8_lr001/log.txt) |
+| YOLOX-tiny | [yolox_tiny_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 5M | 0.28ms | 31.5 | 49.2 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_tiny_bs16_lr002/log.txt) |
+| YOLOX-nano | [yolox_nano_8xb16_300e_coco](https://github.com/alibaba/EasyCV/tree/master/configs/detection/yolox/yolox_tiny_8xb16_300e_coco.py) | 2.2M | 0.19ms | 26.5 | 42.6 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/yolox/yolox_nano_bs16_lr002/log.txt) |
## ViTDet
-
-| Algorithm | Config | Params
(backbone/total) | inference time(V100)
(ms/img) | bbox_mAPval
0.5:0.95 | mask_mAPval
0.5:0.95 | Download |
-| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_mask_rcnn_100e.py) | 86M/111M | 138ms | 50.65 | 45.41 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/20220901_135827.log.json) |
+| Algorithm | Config | Params
(backbone/total) | Train memory
(GB) | inference time(V100)
(ms/img) | bbox_mAPval
0.5:0.95 | mask_mAPval
0.5:0.95 | Download |
+| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| ViTDet_MaskRCNN | [vitdet_maskrcnn](https://github.com/alibaba/EasyCV/tree/master/configs/detection/vitdet/vitdet_mask_rcnn_100e.py) | 86M/111M | 13.3 (fp16) | 138ms | 50.65 | 45.41 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/20220901_135827.log.json) |
## FCOS
-| Algorithm | Config | Params
(backbone/total) | inference time(V100)
(ms/img) | mAPval
0.5:0.95 | APval
50 | Download |
-| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| FCOS-r50(caffe) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_caffe_1x_coco.py) | 23M/32M | 85.8ms | 38.58 | 57.18 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220621_121315.log.json) |
-| FCOS-r50(torch) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_torch_1x_coco.py) | 23M/32M | 105.3ms | 38.88 | 58.01 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/fcos_epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220826_182628.log.json) |
+| Algorithm | Config | Params
(backbone/total) | Train memory
(GB) | inference time(V100)
(ms/img) | mAPval
0.5:0.95 | APval
50 | Download |
+| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| FCOS-r50(caffe) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_caffe_1x_coco.py) | 23M/32M | 5.0 | 85.8ms | 38.58 | 57.18 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220621_121315.log.json) |
+| FCOS-r50(torch) | [fcos-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/fcos/fcos_r50_torch_1x_coco.py) | 23M/32M | 4.0 (fp16) | 105.3ms | 38.88 | 58.01 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/fcos_epoch_12.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/fcos/20220826_182628.log.json) |
## DETR
-| Algorithm | Config | Params
(backbone/total) | inference time(V100)
(ms/img) | bbox_mAPval
0.5:0.95 | APval
50 | Download |
-| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| DETR-r50 | [detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/detr/detr_r50_8x2_150e_coco.py) | 23M/41M | 48.5ms | 39.92 | 60.52 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/epoch_150.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/20220609_101243.log.json) |
-| DAB-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dab_detr_r50_8x2_50e_coco.py) | 23M/43M | 58.5ms | 42.52 | 63.03 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/dab_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/20220610_122811.log.json) |
-| DN-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dn_detr_r50_8x2_50e_coco.py) | 23M/43M | 58.5ms | 44.39 | 64.66 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/dn_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/20220713_105127.log.json) |
+| Algorithm | Config | Params
(backbone/total) | Train memory
(GB) | inference time(V100)
(ms/img) | bbox_mAPval
0.5:0.95 | APval
50 | Download |
+| ---------- | ------------------------------------------------------------ | ------------------------ | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| DETR-r50 | [detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/detr/detr_r50_8x2_150e_coco.py) | 23M/41M | 8.5 | 48.5ms | 39.92 | 60.52 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/epoch_150.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/detr/20220609_101243.log.json) |
+| DAB-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dab_detr_r50_8x2_50e_coco.py) | 23M/43M | 2.6 | 58.5ms | 42.52 | 63.03 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/dab_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dab_detr/20220610_122811.log.json) |
+| DN-DETR-r50 | [dab-detr-r50](https://github.com/alibaba/EasyCV/tree/master/configs/detection/dab_detr/dn_detr_r50_8x2_50e_coco.py) | 23M/43M | 7.8 | 58.5ms | 44.39 | 64.66 | [model](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/dn_detr_epoch_50.pth) - [log](https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/dn_detr/20220713_105127.log.json) |
## DINO
diff --git a/docs/source/model_zoo_seg.md b/docs/source/model_zoo_seg.md
index 97820feb..a1cdbb82 100644
--- a/docs/source/model_zoo_seg.md
+++ b/docs/source/model_zoo_seg.md
@@ -4,29 +4,29 @@
Pretrained on **Pascal VOC 2012 + Aug**.
-| Algorithm | Config | Params
(backbone/total) | inference time(V100)
(ms/img) | mIoU | Download |
-| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| fcn_r50_d8 | [fcn_r50-d8_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py) | 23M/49M | 166ms | 69.01 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/20220525_203606.log.json) |
+| Algorithm | Config | Params
(backbone/total) | Train memory
(GB) | inference time(V100)
(ms/img) | mIoU | Download |
+| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| fcn_r50_d8 | [fcn_r50-d8_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py) | 23M/49M | 19.8 | 166ms | 69.01 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/fcn_r50/20220525_203606.log.json) |
## UperNet
Pretrained on **Pascal VOC 2012 + Aug**.
-
-| Algorithm | Config | Params
(backbone/total) | inference time(V100)
(ms/img) | mIoU | Download |
-| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| upernet_r50 | [upernet_r50_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12aug.py) | 23M/66M | 282.9ms | 76.59 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/20220706_114712.log.json) |
+| Algorithm | Config | Params
(backbone/total) | Train memory
(GB) | inference time(V100)
(ms/img) | mIoU | Download |
+| ---------- | ------------------------------------------------------------ | ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| upernet_r50 | [upernet_r50_512x512_8xb4_60e_voc12aug](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12aug.py) | 23M/66M | 5.5 | 282.9ms | 76.59 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/epoch_60.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/upernet_r50/20220706_114712.log.json) |
## Mask2former
### Instance Segmentation on COCO
-| Algorithm | Config | box MAP | Mask mAP | Download |
-| ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |
-| mask2former_r50 | [mask2former_r50_8xb2_e50_instance](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_instance.py) | 46.09 | 43.26 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/20220620_113639.log.json) |
+| Algorithm | Config | Train memory
(GB) | box MAP | Mask mAP | Download |
+| ---------- | ------------------------------------------------------------ |----------|----------|----------|----------|
+| mask2former_r50 | [mask2former_r50_8xb2_e50_instance](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_instance.py) | 18.8 | 46.09 | 43.26 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_instance/20220620_113639.log.json) |
### Panoptic Segmentation on COCO
-| Algorithm | Config | PQ | box MAP | Mask mAP | Download |
-| ---------- | ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |
-| mask2former_r50 | [mask2former_r50_8xb2_e50_panopatic](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_panopatic.py) | 51.64 | 44.81 | 41.88 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/20220629_170721.log.json) |
+
+| Algorithm | Config | Train memory
(GB) | PQ | box MAP | Mask mAP | Download |
+| ---------- | ---------- | ------------------------------------------------------------ | ------------------------ |----------|---------------------------------------------------------------------------- |---------------------------------------------------------------------------- |
+| mask2former_r50 | [mask2former_r50_8xb2_e50_panopatic](https://github.com/alibaba/EasyCV/tree/master/configs/segmentation/mask2former/mask2former_r50_8xb2_e50_panopatic.py) | 18.8 | 51.64 | 44.81 | 41.88 |[model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/segmentation/mask2former_r50_panoptic/20220629_170721.log.json) |
## SegFormer
diff --git a/docs/source/model_zoo_ssl.md b/docs/source/model_zoo_ssl.md
index 81650e43..c6a1ef30 100644
--- a/docs/source/model_zoo_ssl.md
+++ b/docs/source/model_zoo_ssl.md
@@ -5,19 +5,19 @@
Pretrained on **ImageNet** dataset.
-| Config | Backbone | Params
(backbone/total) | Flops | inference time(V100)
(ms/img) | Epochs | Download |
-| ------------------------------------------------------------ | -------- | -------------------------- | ----- | -------------------------------- | ------ | ------------------------------------------------------------ |
-| [mae_vit_base_patch16_8xb64_400e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_400e.py) | ViT-B/16 | 85M/111M | 9.8G | 8.03 | 400 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-400/pretrain_400.pth) |
-| [mae_vit_base_patch16_8xb64_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_1600e.py) | ViT-B/16 | 85M/111M | 9.8G | 8.03 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/pretrain_1600.pth) |
-| [mae_vit_large_patch16_8xb32_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_large_patch16_8xb32_1600e.py) | ViT-L/16 | 303M/329M | 20.8G | 16.30 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-l-1600/pretrain_1600.pth) |
+| Config | Backbone | Params
(backbone/total) | Train memory
(GB) | Flops | inference time(V100)
(ms/img) | Epochs | Download |
+| ------------------------------------------------------------ | -------- | -------------------------- | ------------------ | ----- | -------------------------------- | ------ | ------------------------------------------------------------ |
+| [mae_vit_base_patch16_8xb64_400e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_400e.py) | ViT-B/16 | 85M/111M | 9.5 | 9.8G | 8.03 | 400 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-400/pretrain_400.pth) |
+| [mae_vit_base_patch16_8xb64_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_base_patch16_8xb64_1600e.py) | ViT-B/16 | 85M/111M | 9.5 | 9.8G | 8.03 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-b-1600/pretrain_1600.pth) |
+| [mae_vit_large_patch16_8xb32_1600e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mae/mae_vit_large_patch16_8xb32_1600e.py) | ViT-L/16 | 303M/329M | 11.3 | 20.8G | 16.30 | 1600 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mae/vit-l-1600/pretrain_1600.pth) |
### Fast ConvMAE
Pretrained on **ImageNet** dataset.
-| Config | Backbone | Params
(backbone/total) | Flops | inference time(V100)
(ms/img) | Epochs | Download |
-| ------------------------------------------------------------ | --------------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
-| [fast_convmae_vit_base_patch16_8xb64_50e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/fast_convmae/fast_convmae_vit_base_patch16_8xb64_50e.py) | ConvViT-B/16 | 88M/115M | 45.1G | 6.88 | 50 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/20220617_110501.log.json) |
+| Config | Backbone | Params
(backbone/total) | Train memory
(GB) | Flops | inference time(V100)
(ms/img) | Total train time | Epochs | Download |
+| ------------------------------------------------------------ | --------------- | --------------------------- | ----- | --------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| [fast_convmae_vit_base_patch16_8xb64_50e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/fast_convmae/fast_convmae_vit_base_patch16_8xb64_50e.py) | ConvViT-B/16 | 88M/115M | 30.3 | 45.1G | 6.88 | 20h
(8*A100) | 50 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/epoch_50.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/FastConvMAE/pretrained/20220617_110501.log.json) |
> The flops of Fast ConvMAE is about four times of MAE, because the mask of MAE only retains 25% of the tokens each forward, but the mask of Fast ConvMAE adopts a complementary strategy, dividing the mask into four complementary parts with 25% token each part. This is equivalent to learning four samples at each forward, achieving 4 times the learning effect.
@@ -25,34 +25,34 @@ Pretrained on **ImageNet** dataset.
Pretrained on **ImageNet** dataset.
-| Config | Backbone | Params
(backbone/total) | inference time(V100)
(ms/img) | Epochs | Download |
-| ------------------------------------------------------------ | --------- | --------------------------- | --------------------------------- | ------ | ------------------------------------------------------------ |
-| [dino_deit_small_p16_8xb32_100e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/dino/dino_deit_small_p16_8xb32_100e_tfrecord.py) | DeiT-S/16 | 21M/88M | 6.17 | 100 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/dino_deit_small/epoch_100.pth) |
+| Config | Backbone | Params
(backbone/total) | Train memory
(GB) | inference time(V100)
(ms/img) | Epochs | Download |
+| ------------------------------------------------------------ | --------- | --------------------------- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
+| [dino_deit_small_p16_8xb32_100e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/dino/dino_deit_small_p16_8xb32_100e_tfrecord.py) | DeiT-S/16 | 21M/88M | 10.5 | 6.17 | 100 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/dino_deit_small/epoch_100.pth) |
### MoBY
Pretrained on **ImageNet** dataset.
-| Config | Backbone | Params
(backbone/total) | Flops | inference time(V100)
(ms/img) | Epochs | Download |
-| ------------------------------------------------------------ | --------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
-| [moby_deit_small_p16_4xb128_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_deit_small_p16_4xb128_300e_tfrecord.py) | DeiT-S/16 | 21M/26M | 18.6G | 6.17 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/log.txt) |
-| [moby_swin_tiny_8xb64_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_dynamic_swin_tiny_8xb64_300e_tfrecord.py) | Swin-T | 27M/33M | 18.1G | 9.74 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/log.txt) |
+| Config | Backbone | Params
(backbone/total) | Flops | Train memory
(GB) | inference time(V100)
(ms/img) | Epochs | Download |
+| ------------------------------------------------------------ | --------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
+| [moby_deit_small_p16_4xb128_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_deit_small_p16_4xb128_300e_tfrecord.py) | DeiT-S/16 | 21M/26M | 18.6G | 21.4 | 6.17 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_deit_small_p16/log.txt) |
+| [moby_swin_tiny_8xb64_300e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/moby/moby_dynamic_swin_tiny_8xb64_300e_tfrecord.py) | Swin-T | 27M/33M | 18.1G | 16.1 | 9.74 | 300 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/epoch_300.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/moby_dynamic_swin_tiny/log.txt) |
### MoCo V2
Pretrained on **ImageNet** dataset.
-| Config | Backbone | Params
(backbone/total) | Flops | inference time(V100)
(ms/img) | Epochs | Download |
-| ------------------------------------------------------------ | -------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
-| [mocov2_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mocov2/mocov2_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 8.2G | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mocov2_r50/epoch_200.pth) |
+| Config | Backbone | Params
(backbone/total) | Flops | Train memory
(GB) | inference time(V100)
(ms/img) | Epochs | Download |
+| ------------------------------------------------------------ | -------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
+| [mocov2_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/mocov2/mocov2_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 8.2G | 5.4 | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/mocov2_r50/epoch_200.pth) |
### SwAV
Pretrained on **ImageNet** dataset.
-| Config | Backbone | Params
(backbone/total) | Flops | inference time(V100)
(ms/img) | Epochs | Download |
-| ------------------------------------------------------------ | -------- | --------------------------- | ----- | --------------------------------- | ------ | ------------------------------------------------------------ |
-| [swav_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/swav/swav_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 12.9G | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/epoch_200.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/log.txt) |
+| Config | Backbone | Params
(backbone/total) | Flops | Train memory
(GB) | inference time(V100)
(ms/img) | Epochs | Download |
+| ------------------------------------------------------------ | -------- | --------------------------- | ----- | ------------------ | --------------------------------- | ------ | ------------------------------------------------------------ |
+| [swav_resnet50_8xb32_200e](https://github.com/alibaba/EasyCV/tree/master/configs/selfsup/swav/swav_rn50_8xb32_200e_tfrecord.py) | ResNet50 | 23M/28M | 12.9G | 11.3 | 8.59 | 200 | [model](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/epoch_200.pth) - [log](http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/selfsup/swav_r50/log.txt) |
## Benchmarks
From bb53e066be94f21d651ad9dd8fff2b9db1810519 Mon Sep 17 00:00:00 2001
From: yhq
Date: Mon, 19 Sep 2022 19:52:21 +0800
Subject: [PATCH 8/9] fix missing vit model (#197)
* fix missing vit model
* set pretrained false while export cls model
---
...ge_patch16_8xb16_50e_lrdecay075_fintune.py | 3 +
.../tutorials/EasyCV图像自监督训练-MAE.ipynb | 55 ++++++++++++++++++-
easycv/apis/export.py | 3 +
easycv/models/modelzoo.py | 6 ++
4 files changed, 64 insertions(+), 3 deletions(-)
diff --git a/benchmarks/selfsup/classification/imagenet/mae_vit_large_patch16_8xb16_50e_lrdecay075_fintune.py b/benchmarks/selfsup/classification/imagenet/mae_vit_large_patch16_8xb16_50e_lrdecay075_fintune.py
index 31694589..fa008b3e 100644
--- a/benchmarks/selfsup/classification/imagenet/mae_vit_large_patch16_8xb16_50e_lrdecay075_fintune.py
+++ b/benchmarks/selfsup/classification/imagenet/mae_vit_large_patch16_8xb16_50e_lrdecay075_fintune.py
@@ -157,3 +157,6 @@ checkpoint_config = dict(interval=10)
# runtime settings
total_epochs = 50
+
+# export config
+export = dict(export_neck=True)
diff --git a/docs/source/tutorials/EasyCV图像自监督训练-MAE.ipynb b/docs/source/tutorials/EasyCV图像自监督训练-MAE.ipynb
index 71024860..f96f0b52 100644
--- a/docs/source/tutorials/EasyCV图像自监督训练-MAE.ipynb
+++ b/docs/source/tutorials/EasyCV图像自监督训练-MAE.ipynb
@@ -262,7 +262,7 @@
"state_dict = torch.load(weight_path)['state_dict']\n",
"state_dict_out = {}\n",
"for key in state_dict:\n",
- " state_dict_out[key.replace('encoder.','')] = state_dict[key]\n",
+ " state_dict_out['model.' + key.replace('encoder.','')] = state_dict[key]\n",
"torch.save(state_dict_out,weight_path)"
]
},
@@ -324,7 +324,7 @@
"outputs": [],
"source": [
"!python -m torch.distributed.launch --nproc_per_node=1 --master_port=29930 \\\n",
- "/home/pai/lib/python3.6/site-packages/easycv/tools/train.py mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py --work_dir work_dir/selfsup/jpg/mae --launcher pytorch"
+ "/home/pai/lib/python3.6/site-packages/easycv/tools/train.py mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py --work_dir work_dir/selfsup/jpg/mae_fintune --launcher pytorch"
]
},
{
@@ -333,7 +333,56 @@
"metadata": {},
"source": [
"### 预测\n",
- "参考EasyCV图像分类的demo,对训练好的模型导出并预测"
+ "对训练好的模型导出并预测"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4271c852",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! python -m easycv.tools.export mae_vit_base_patch16_8xb64_100e_lrdecay065_fintune.py work_dir/selfsup/jpg/mae_fintune/ClsEvaluator_neck_top1_best.pth work_dir/selfsup/jpg/mae_fintune/best_export.pth"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2cc9e6fc",
+ "metadata": {},
+ "source": [
+ "下载测试图片和标签文件"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "973d5bd4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/cifar10/qince_data/predict/aeroplane_s_000004.png\n",
+ "! wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/doc/easycv/configs/selfsup/mae/label_map.txt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5a5a3632",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import cv2\n",
+ "from easycv.predictors.classifier import TorchClassifier\n",
+ "\n",
+ "output_ckpt = 'work_dir/selfsup/jpg/mae_fintune/best_export.pth'\n",
+ "tcls = TorchClassifier(output_ckpt, topk=1, label_map_path='label_map.txt')\n",
+ "\n",
+ "img = cv2.imread('aeroplane_s_000004.png')\n",
+ "# input image should be RGB order\n",
+ "img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
+ "output = tcls.predict([img])\n",
+ "print(output)"
]
}
],
diff --git a/easycv/apis/export.py b/easycv/apis/export.py
index c2633acb..9a0b9165 100644
--- a/easycv/apis/export.py
+++ b/easycv/apis/export.py
@@ -107,6 +107,9 @@ def _export_cls(model, cfg, filename):
backbone=replace_syncbn(cfg.model.backbone),
)
+ # avoid load pretrained model
+ model_config['pretrained'] = False
+
if export_neck:
if hasattr(cfg.model, 'neck'):
model_config['neck'] = cfg.model.neck
diff --git a/easycv/models/modelzoo.py b/easycv/models/modelzoo.py
index 0680dd21..58f005c4 100644
--- a/easycv/models/modelzoo.py
+++ b/easycv/models/modelzoo.py
@@ -253,4 +253,10 @@ timm_models = {
'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/pretrained_models/timm/swin_small_patch4_window7_224_statedict.pth',
'dynamic_swin_tiny_p4_w7_224':
'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/pretrained_models/timm/swin_tiny_patch4_window7_224_statedict.pth',
+
+ # dynamic_vit:
+ 'dynamic_vit_base_p16':
+ 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/timm/vit/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz',
+ 'dynamic_vit_large_p16':
+ 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/timm/vit/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz',
}
From 5dfe7b289829ef3eab2345279797c8b360faa813 Mon Sep 17 00:00:00 2001
From: Cathy0908 <30484308+Cathy0908@users.noreply.github.com>
Date: Tue, 20 Sep 2022 10:04:42 +0800
Subject: [PATCH 9/9] update some predcitors, support batch inference (#195)
update some predcitors, support batch inference
---
.gitignore | 3 -
.../resnet/resnet50_b32x8_100e_jpg.py | 10 +
.../segmentation/data_sources/base.py | 2 +-
easycv/file/image.py | 38 ++-
easycv/file/utils.py | 3 +-
easycv/predictors/__init__.py | 3 +-
easycv/predictors/base.py | 97 ++++--
easycv/predictors/classifier.py | 115 ++++++-
easycv/predictors/detector.py | 250 ++++-----------
easycv/predictors/face_keypoints_predictor.py | 9 +-
easycv/predictors/hand_keypoints_predictor.py | 149 +++++----
easycv/predictors/segmentation.py | 287 +++++-------------
easycv/utils/checkpoint.py | 44 ++-
easycv/utils/constant.py | 4 +-
easycv/utils/misc.py | 36 ++-
.../segmentation/test_seg_raw_dataset.py | 2 -
tests/predictors/test_classifier.py | 51 +++-
tests/predictors/test_detector.py | 51 +++-
tests/predictors/test_detector_blade.py | 11 +-
.../test_face_keypoints_predictor.py | 18 +-
.../test_hand_keypoints_predictor.py | 31 ++
tests/predictors/test_segmentation.py | 63 ++--
tests/predictors/test_segmentor.py | 48 ---
tests/ut_config.py | 8 +-
thirdparty/u2sod/sodpredictor.py | 5 +-
25 files changed, 671 insertions(+), 667 deletions(-)
delete mode 100644 tests/predictors/test_segmentor.py
diff --git a/.gitignore b/.gitignore
index 1828f7e8..f63cdea2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -137,6 +137,3 @@ pai_jobs/easycv/resources/
*.tar.gz
thirdparty/test
scripts/test
-
-# easycv default cache dir
-.easycv_cache
diff --git a/configs/classification/imagenet/resnet/resnet50_b32x8_100e_jpg.py b/configs/classification/imagenet/resnet/resnet50_b32x8_100e_jpg.py
index 03124f20..a81e4ae2 100644
--- a/configs/classification/imagenet/resnet/resnet50_b32x8_100e_jpg.py
+++ b/configs/classification/imagenet/resnet/resnet50_b32x8_100e_jpg.py
@@ -86,3 +86,13 @@ checkpoint_config = dict(interval=10)
# runtime settings
total_epochs = 100
+
+predict = dict(
+ type='ClassificationPredictor',
+ pipelines=[
+ dict(type='Resize', size=256),
+ dict(type='CenterCrop', size=224),
+ dict(type='ToTensor'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Collect', keys=['img'])
+ ])
diff --git a/easycv/datasets/segmentation/data_sources/base.py b/easycv/datasets/segmentation/data_sources/base.py
index b8dc3673..a893932e 100644
--- a/easycv/datasets/segmentation/data_sources/base.py
+++ b/easycv/datasets/segmentation/data_sources/base.py
@@ -27,7 +27,7 @@ def load_image(img_path):
def load_seg_map(seg_path, reduce_zero_label):
- gt_semantic_seg = _load_img(seg_path, mode='RGB')
+ gt_semantic_seg = _load_img(seg_path, mode='P')
# reduce zero_label
if reduce_zero_label:
# avoid using underflow conversion
diff --git a/easycv/file/image.py b/easycv/file/image.py
index a6253427..3a1fff90 100644
--- a/easycv/file/image.py
+++ b/easycv/file/image.py
@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+import io
import logging
import time
@@ -6,10 +7,10 @@ import cv2
import numpy as np
from PIL import Image
-from easycv.file import io
+from easycv import file
from easycv.framework.errors import IOError
from easycv.utils.constant import MAX_READ_IMAGE_TRY_TIMES
-from .utils import is_oss_path
+from .utils import is_oss_path, is_url_path
def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
@@ -20,16 +21,31 @@ def load_image(img_path, mode='BGR', max_try_times=MAX_READ_IMAGE_TRY_TIMES):
img = None
while try_cnt < max_try_times:
try:
- with io.open(img_path, 'rb') as infile:
- # cv2.imdecode may corrupt when the img is broken
- image = Image.open(infile) # RGB
+ if is_url_path(img_path):
+ from mmcv.fileio.file_client import HTTPBackend
+ client = HTTPBackend()
+ img_bytes = client.get(img_path)
+ buff = io.BytesIO(img_bytes)
+ image = Image.open(buff)
+ if mode.upper() != 'BGR' and image.mode.upper() != mode.upper(
+ ):
+ image = image.convert(mode.upper())
img = np.asarray(image, dtype=np.uint8)
- if mode.upper() == 'BGR':
- img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
- assert mode.upper() in ['RGB', 'BGR'
- ], 'Only support `RGB` and `BGR` mode!'
- assert img is not None
- break
+ else:
+ with file.io.open(img_path, 'rb') as infile:
+ # cv2.imdecode may corrupt when the img is broken
+ image = Image.open(infile)
+ if mode.upper() != 'BGR' and image.mode.upper(
+ ) != mode.upper():
+ image = image.convert(mode.upper())
+ img = np.asarray(image, dtype=np.uint8)
+
+ if mode.upper() == 'BGR':
+ if image.mode.upper() != 'RGB':
+ image = image.convert('RGB')
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+ assert img is not None
+ break
except Exception as e:
logging.error(e)
logging.warning('Read file {} fault, try count : {}'.format(
diff --git a/easycv/file/utils.py b/easycv/file/utils.py
index 49920611..f943c725 100644
--- a/easycv/file/utils.py
+++ b/easycv/file/utils.py
@@ -13,7 +13,7 @@ from tqdm import tqdm
from easycv.framework.errors import ValueError
OSS_PREFIX = 'oss://'
-URL_PREFIX = 'https://'
+URL_PREFIX = ('https://', 'http://')
def create_namedtuple(**kwargs):
@@ -33,6 +33,7 @@ def url_path_exists(url):
urllib.request.urlopen(url).code
except Exception as err:
print(err)
+ return False
return True
diff --git a/easycv/predictors/__init__.py b/easycv/predictors/__init__.py
index 3fe86936..2d38f2f2 100644
--- a/easycv/predictors/__init__.py
+++ b/easycv/predictors/__init__.py
@@ -9,5 +9,4 @@ from .feature_extractor import (TorchFaceAttrExtractor,
from .hand_keypoints_predictor import HandKeypointsPredictor
from .pose_predictor import (TorchPoseTopDownPredictor,
TorchPoseTopDownPredictorWithDetector)
-from .segmentation import (Mask2formerPredictor, SegFormerPredictor,
- SegmentationPredictor)
+from .segmentation import Mask2formerPredictor, SegmentationPredictor
diff --git a/easycv/predictors/base.py b/easycv/predictors/base.py
index d65b1bb6..5b36f2fd 100644
--- a/easycv/predictors/base.py
+++ b/easycv/predictors/base.py
@@ -1,19 +1,23 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
import os
import pickle
+import cv2
import numpy as np
import torch
from mmcv.parallel import collate, scatter_kwargs
from PIL import Image
+from torch.hub import load_state_dict_from_url
from torchvision.transforms import Compose
from easycv.datasets.registry import PIPELINES
from easycv.file import io
+from easycv.file.utils import is_url_path
from easycv.framework.errors import ValueError
from easycv.models.builder import build_model
from easycv.utils.checkpoint import load_checkpoint
-from easycv.utils.config_tools import mmcv_config_fromfile
+from easycv.utils.config_tools import Config, mmcv_config_fromfile
from easycv.utils.constant import CACHE_DIR
from easycv.utils.mmlab_utils import (dynamic_adapt_for_mmlab,
remove_adapt_for_mmlab)
@@ -107,7 +111,9 @@ class PredictorV2(object):
device (str): Support 'cuda' or 'cpu', if is None, detect device automatically.
save_results (bool): Whether to save predict results.
save_path (str): File path for saving results, only valid when `save_results` is True.
+ pipelines (list[dict]): Data pipeline configs.
"""
+ INPUT_IMAGE_MODE = 'BGR' # the image mode into the model
def __init__(self,
model_path,
@@ -116,30 +122,51 @@ class PredictorV2(object):
device=None,
save_results=False,
save_path=None,
- mode='rgb',
+ pipelines=None,
*args,
**kwargs):
self.model_path = model_path
self.batch_size = batch_size
self.save_results = save_results
self.save_path = save_path
+ self.config_file = config_file
if self.save_results:
assert self.save_path is not None
self.device = device
if self.device is None:
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
- self.cfg = None
if config_file is not None:
if isinstance(config_file, str):
self.cfg = mmcv_config_fromfile(config_file)
else:
self.cfg = config_file
+ else:
+ self.cfg = self._load_cfg_from_ckpt(self.model_path)
+
+ if self.cfg is None:
+ raise ValueError('Please provide "config_file"!')
self.model = self.prepare_model()
+ self.pipelines = pipelines
self.processor = self.build_processor()
self._load_op = None
- self.mode = mode
+
+ def _load_cfg_from_ckpt(self, model_path):
+ if is_url_path(model_path):
+ ckpt = load_state_dict_from_url(model_path)
+ else:
+ with io.open(model_path, 'rb') as infile:
+ ckpt = torch.load(infile, map_location='cpu')
+
+ cfg = None
+ if 'meta' in ckpt and 'config' in ckpt['meta']:
+ cfg = ckpt['meta']['config']
+ if isinstance(cfg, dict):
+ cfg = Config(cfg)
+ elif isinstance(cfg, str):
+ cfg = Config(json.loads(cfg))
+ return cfg
def prepare_model(self):
"""Build model from config file by default.
@@ -152,8 +179,6 @@ class PredictorV2(object):
return model
def _build_model(self):
- if self.cfg is None:
- raise ValueError('Please provide "config_file"!')
# Use mmdet model
dynamic_adapt_for_mmlab(self.cfg)
model = build_model(self.cfg.model)
@@ -165,16 +190,15 @@ class PredictorV2(object):
"""Build processor to process loaded input.
If you need custom preprocessing ops, you need to reimplement it.
"""
- if self.cfg is None:
- pipeline = []
+ if self.pipelines is not None:
+ pipelines = self.pipelines
else:
- pipeline = [
- build_from_cfg(p, PIPELINES)
- for p in self.cfg.get('test_pipeline', [])
- ]
+ pipelines = self.cfg.get('test_pipeline', [])
+
+ pipelines = [build_from_cfg(p, PIPELINES) for p in pipelines]
from easycv.datasets.shared.pipelines.transforms import Compose
- processor = Compose(pipeline)
+ processor = Compose(pipelines)
return processor
def _load_input(self, input):
@@ -190,10 +214,13 @@ class PredictorV2(object):
}
"""
if self._load_op is None:
- load_cfg = dict(type='LoadImage', mode=self.mode)
+ load_cfg = dict(type='LoadImage', mode=self.INPUT_IMAGE_MODE)
self._load_op = build_from_cfg(load_cfg, PIPELINES)
if not isinstance(input, str):
+ if isinstance(input, np.ndarray):
+ # Only support RGB mode if input is np.ndarray.
+ input = cv2.cvtColor(input, cv2.COLOR_RGB2BGR)
sample = self._load_op({'img': input})
else:
sample = self._load_op({'filename': input})
@@ -229,8 +256,32 @@ class PredictorV2(object):
return outputs
def postprocess(self, inputs, *args, **kwargs):
- """Process model outputs.
- If you need add some processing ops to process model outputs, you need to reimplement it.
+ """Process model batch outputs.
+ """
+ outputs = []
+ out_i = {}
+ batch_size = 1
+ # get current batch size
+ for k, batch_v in inputs.items():
+ if batch_v is not None:
+ batch_size = len(batch_v)
+ break
+
+ for i in range(batch_size):
+ for k, batch_v in inputs.items():
+ if batch_v is not None:
+ out_i[k] = batch_v[i]
+ else:
+ out_i[k] = None
+
+ out_i = self.postprocess_single(out_i)
+ outputs.append(out_i)
+
+ return outputs
+
+ def postprocess_single(self, inputs):
+ """Process outputs of single sample.
+ If you need add some processing ops, you need to reimplement it.
"""
return inputs
@@ -260,16 +311,22 @@ class PredictorV2(object):
results_list = []
for i in range(0, len(inputs), self.batch_size):
- batch = inputs[i:max(len(inputs) - 1, i + self.batch_size)]
+ batch = inputs[i:min(len(inputs), i + self.batch_size)]
batch_outputs = self.preprocess(batch)
batch_outputs = self.forward(batch_outputs)
results = self.postprocess(batch_outputs)
+ assert len(results) == len(
+ batch), f'Mismatch size {len(results)} != {len(batch)}'
if keep_inputs:
- results = {'inputs': batch, 'results': results}
+ for i in range(len(batch)):
+ results[i].update({'inputs': batch[i]})
# if dump, the outputs will not added to the return value to prevent taking up too much memory
if self.save_results:
- self.dump([results], self.save_path, mode='ab+')
+ self.dump(results, self.save_path, mode='ab+')
else:
- results_list.append(results)
+ if isinstance(results, list):
+ results_list.extend(results)
+ else:
+ results_list.append(results)
return results_list
diff --git a/easycv/predictors/classifier.py b/easycv/predictors/classifier.py
index e29d6736..a788c354 100644
--- a/easycv/predictors/classifier.py
+++ b/easycv/predictors/classifier.py
@@ -3,17 +3,130 @@ import math
import numpy as np
import torch
+from PIL import Image, ImageFile
+from easycv.file import io
from easycv.framework.errors import ValueError
-from .base import Predictor
+from easycv.utils.misc import deprecated
+from .base import Predictor, PredictorV2
from .builder import PREDICTORS
+
+@PREDICTORS.register_module()
+class ClassificationPredictor(PredictorV2):
+ """Predictor for classification.
+ Args:
+ model_path (str): Path of model path.
+ config_file (Optinal[str]): config file path for model and processor to init. Defaults to None.
+ batch_size (int): batch size for forward.
+ device (str): Support 'cuda' or 'cpu', if is None, detect device automatically.
+ save_results (bool): Whether to save predict results.
+ save_path (str): File path for saving results, only valid when `save_results` is True.
+ pipelines (list[dict]): Data pipeline configs.
+ topk (int): Return top-k results. Default: 1.
+ pil_input (bool): Whether use PIL image. If processor need PIL input, set true, default false.
+ label_map_path (str): File path of saving labels list.
+ """
+
+ def __init__(self,
+ model_path,
+ config_file=None,
+ batch_size=1,
+ device=None,
+ save_results=False,
+ save_path=None,
+ pipelines=[],
+ topk=1,
+ pil_input=True,
+ label_map_path=[],
+ *args,
+ **kwargs):
+ super(ClassificationPredictor, self).__init__(
+ model_path,
+ config_file=config_file,
+ batch_size=batch_size,
+ device=device,
+ save_results=save_results,
+ save_path=save_path,
+ pipelines=pipelines,
+ *args,
+ **kwargs)
+ self.topk = topk
+ self.pil_input = pil_input
+
+ # Adapt to torchvision transforms which process PIL inputs.
+ if self.pil_input:
+ self.INPUT_IMAGE_MODE = 'RGB'
+
+ if label_map_path is None:
+ class_list = self.cfg.get('CLASSES', [])
+ else:
+ with io.open(label_map_path, 'r') as f:
+ class_list = f.readlines()
+ self.label_map = [i.strip() for i in class_list]
+
+ def _load_input(self, input):
+ """Load image from file or numpy or PIL object.
+ Args:
+ input: File path or numpy or PIL object.
+ Returns:
+ {
+ 'filename': filename,
+ 'img': img,
+ 'img_shape': img_shape,
+ 'img_fields': ['img']
+ }
+ """
+ if self.pil_input:
+ results = {}
+ if isinstance(input, str):
+ img = Image.open(input)
+ if img.mode.upper() != self.INPUT_IMAGE_MODE.upper():
+ img = img.convert(self.INPUT_IMAGE_MODE.upper())
+ results['filename'] = input
+ else:
+ assert isinstance(input, ImageFile.ImageFile)
+ img = input
+ results['filename'] = None
+ results['img'] = img
+ results['img_shape'] = img.size
+ results['ori_shape'] = img.size
+ results['img_fields'] = ['img']
+ return results
+
+ return super()._load_input(input)
+
+ def postprocess(self, inputs, *args, **kwargs):
+ """Return top-k results."""
+ output_prob = inputs['prob'].data.cpu()
+ topk_class = torch.topk(output_prob, self.topk).indices.numpy()
+ output_prob = output_prob.numpy()
+ batch_results = []
+ batch_size = output_prob.shape[0]
+ for i in range(batch_size):
+ result = {'class': np.squeeze(topk_class[i]).tolist()}
+ if isinstance(result['class'], int):
+ result['class'] = [result['class']]
+
+ if len(self.label_map) > 0:
+ result['class_name'] = [
+ self.label_map[i] for i in result['class']
+ ]
+ result['class_probs'] = {}
+ for l_idx, l_name in enumerate(self.label_map):
+ result['class_probs'][l_name] = output_prob[i][l_idx]
+
+ batch_results.append(result)
+ return batch_results
+
+
try:
from easy_vision.python.inference.predictor import PredictorInterface
except:
from .interface import PredictorInterface
+@deprecated(reason='Please use ClassificationPredictor.')
@PREDICTORS.register_module()
class TorchClassifier(PredictorInterface):
diff --git a/easycv/predictors/detector.py b/easycv/predictors/detector.py
index 7637ca67..38fd262f 100644
--- a/easycv/predictors/detector.py
+++ b/easycv/predictors/detector.py
@@ -5,9 +5,6 @@ from glob import glob
import numpy as np
import torch
-from mmcv.ops import RoIPool
-from mmcv.parallel import collate, scatter
-from torch.hub import load_state_dict_from_url
from torchvision.transforms import Compose
from easycv.apis.export import reparameterize_models
@@ -15,16 +12,12 @@ from easycv.core.visualization import imshow_bboxes
from easycv.datasets.registry import PIPELINES
from easycv.datasets.utils import replace_ImageToTensor
from easycv.file import io
-from easycv.file.utils import is_url_path, url_path_exists
-from easycv.framework.errors import TypeError
from easycv.models import build_model
from easycv.models.detection.utils import postprocess
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.config_tools import mmcv_config_fromfile
from easycv.utils.constant import CACHE_DIR
-from easycv.utils.logger import get_root_logger
-from easycv.utils.mmlab_utils import (dynamic_adapt_for_mmlab,
- remove_adapt_for_mmlab)
+from easycv.utils.misc import deprecated
from easycv.utils.registry import build_from_cfg
from .base import PredictorV2
from .builder import PREDICTORS
@@ -47,14 +40,16 @@ class DetectionPredictor(PredictorV2):
"""
def __init__(self,
- model_path=None,
+ model_path,
config_file=None,
batch_size=1,
device=None,
save_results=False,
save_path=None,
- mode='rgb',
- score_threshold=0.5):
+ pipelines=None,
+ score_threshold=0.5,
+ *arg,
+ **kwargs):
super(DetectionPredictor, self).__init__(
model_path,
config_file=config_file,
@@ -62,194 +57,55 @@ class DetectionPredictor(PredictorV2):
device=device,
save_results=save_results,
save_path=save_path,
- mode=mode,
+ pipelines=pipelines,
)
self.score_thresh = score_threshold
+ self.CLASSES = self.cfg.get('CLASSES', None)
+
+ def build_processor(self):
+ if self.pipelines is not None:
+ pipelines = self.pipelines
+ elif self.cfg is None:
+ pipelines = []
+ else:
+ pipelines = self.cfg.get('test_pipeline', [])
+
+ # for batch inference
+ self.pipelines = replace_ImageToTensor(pipelines)
+
+ return super().build_processor()
+
+ def postprocess_single(self, inputs, *args, **kwargs):
+ if inputs['detection_scores'] is None or len(
+ inputs['detection_scores']) < 1:
+ return inputs
+
+ scores = inputs['detection_scores']
+ if scores is not None and self.score_thresh > 0:
+ keeped_ids = scores > self.score_thresh
+ inputs['detection_scores'] = inputs['detection_scores'][keeped_ids]
+ inputs['detection_boxes'] = inputs['detection_boxes'][keeped_ids]
+ inputs['detection_classes'] = inputs['detection_classes'][
+ keeped_ids]
+
+ class_names = []
+ for _, classes_id in enumerate(inputs['detection_classes']):
+ if classes_id is None:
+ class_names.append(None)
+ elif self.CLASSES is not None and len(self.CLASSES) > 0:
+ class_names.append(self.CLASSES[int(classes_id)])
+ else:
+ class_names.append(classes_id)
+
+ inputs['detection_class_names'] = class_names
- def postprocess(self, inputs, *args, **kwargs):
- for batch_index in range(self.batch_size):
- this_detection_scores = inputs['detection_scores'][batch_index]
- sel_ids = this_detection_scores > self.score_thresh
- inputs['detection_scores'][batch_index] = inputs[
- 'detection_scores'][batch_index][sel_ids]
- inputs['detection_boxes'][batch_index] = inputs['detection_boxes'][
- batch_index][sel_ids]
- inputs['detection_classes'][batch_index] = inputs[
- 'detection_classes'][batch_index][sel_ids]
- # TODO class label remapping
return inputs
-
-class DetrPredictor(PredictorInterface):
- """Inference image(s) with the detector.
- Args:
- model_path (str): checkpoint model and export model are shared.
- config_path (str): If config_path is specified, both checkpoint model and export model can be used; if config_path=None, the export model is used by default.
- """
-
- def __init__(self, model_path, config_path=None):
-
- self.model_path = model_path
-
- if config_path is not None:
- self.cfg = mmcv_config_fromfile(config_path)
- else:
- logger = get_root_logger()
- logger.warning('please use export model!')
- if is_url_path(self.model_path) and url_path_exists(
- self.model_path):
- checkpoint = load_state_dict_from_url(model_path)
- else:
- assert io.exists(
- self.model_path), f'{self.model_path} does not exists'
-
- with io.open(self.model_path, 'rb') as infile:
- checkpoint = torch.load(infile, map_location='cpu')
-
- assert 'meta' in checkpoint and 'config' in checkpoint[
- 'meta'], 'meta.config is missing from checkpoint'
-
- config_str = checkpoint['meta']['config']
- if isinstance(config_str, dict):
- config_str = json.dumps(config_str)
-
- # get config
- basename = os.path.basename(self.model_path)
- fname, _ = os.path.splitext(basename)
- self.local_config_file = os.path.join(CACHE_DIR,
- f'{fname}_config.json')
- if not os.path.exists(CACHE_DIR):
- os.makedirs(CACHE_DIR)
- with open(self.local_config_file, 'w') as ofile:
- ofile.write(config_str)
- self.cfg = mmcv_config_fromfile(self.local_config_file)
-
- # dynamic adapt mmdet models
- dynamic_adapt_for_mmlab(self.cfg)
-
- # build model
- self.model = build_model(self.cfg.model)
-
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
- map_location = 'cpu' if self.device == 'cpu' else 'cuda'
- self.ckpt = load_checkpoint(
- self.model, self.model_path, map_location=map_location)
-
- self.model.to(self.device)
- self.model.eval()
-
- self.CLASSES = self.cfg.CLASSES
-
- def predict(self, imgs):
- """
- Args:
- imgs (str/ndarray or list[str/ndarray] or tuple[str/ndarray]):
- Either image files or loaded images.
- Returns:
- If imgs is a list or tuple, the same length list type results
- will be returned, otherwise return the detection results directly.
- """
-
- if isinstance(imgs, (list, tuple)):
- is_batch = True
- else:
- imgs = [imgs]
- is_batch = False
-
- cfg = self.cfg
- device = next(self.model.parameters()).device # model device
-
- if isinstance(imgs[0], np.ndarray):
- cfg = cfg.copy()
- # set loading pipeline type
- cfg.data.val.pipeline.insert(0, dict(type='LoadImageFromWebcam'))
- else:
- cfg = cfg.copy()
- # set loading pipeline type
- cfg.data.val.pipeline.insert(
- 0,
- dict(
- type='LoadImageFromFile',
- file_client_args=dict(
- backend=('http' if imgs[0].startswith('http'
- ) else 'disk'))))
-
- cfg.data.val.pipeline = replace_ImageToTensor(cfg.data.val.pipeline)
-
- transforms = []
- for transform in cfg.data.val.pipeline:
- if 'img_scale' in transform:
- transform['img_scale'] = tuple(transform['img_scale'])
- if isinstance(transform, dict):
- transform = build_from_cfg(transform, PIPELINES)
- transforms.append(transform)
- elif callable(transform):
- transforms.append(transform)
- else:
- raise TypeError('transform must be callable or a dict')
- test_pipeline = Compose(transforms)
-
- datas = []
- for img in imgs:
- # prepare data
- if isinstance(img, np.ndarray):
- # directly add img
- data = dict(img=img)
- else:
- # add information into dict
- data = dict(img_info=dict(filename=img), img_prefix=None)
- # build the data pipeline
- data = test_pipeline(data)
- datas.append(data)
-
- data = collate(datas, samples_per_gpu=len(imgs))
- # just get the actual data from DataContainer
- data['img_metas'] = [
- img_metas.data[0] for img_metas in data['img_metas']
- ]
- data['img'] = [img.data[0] for img in data['img']]
- if next(self.model.parameters()).is_cuda:
- # scatter to specified GPU
- data = scatter(data, [device])[0]
- else:
- for m in self.model.modules():
- assert not isinstance(
- m, RoIPool
- ), 'CPU inference with RoIPool is not supported currently.'
-
- # forward the model
- with torch.no_grad():
- results = self.model(mode='test', **data)
-
- return results
-
- def visualize(self,
- img,
- results,
- score_thr=0.3,
- show=False,
- out_file=None):
- bboxes = results['detection_boxes'][0]
- scores = results['detection_scores'][0]
- labels = results['detection_classes'][0].tolist()
-
- # If self.CLASSES is not None, class_id will be converted to self.CLASSES for visualization,
- # otherwise the class_id will be displayed.
- # And don't try to modify the value in results, it may cause some bugs or even precision problems,
- # because `self.evaluate` will also use the results, refer to: https://github.com/alibaba/EasyCV/pull/67
-
- if self.CLASSES is not None and len(self.CLASSES) > 0:
- for i, classes_id in enumerate(labels):
- if classes_id is None:
- labels[i] = None
- else:
- labels[i] = self.CLASSES[int(classes_id)]
-
- if scores is not None and score_thr > 0:
- inds = scores > score_thr
- bboxes = bboxes[inds]
- labels = np.array(labels)[inds]
-
+ def visualize(self, img, results, show=False, out_file=None):
+ """Only support show one sample now."""
+ bboxes = results['detection_boxes']
+ labels = results['detection_class_names']
+ img = self._load_input(img)['img']
imshow_bboxes(
img,
bboxes,
@@ -263,6 +119,12 @@ class DetrPredictor(PredictorInterface):
out_file=out_file)
+@deprecated(reason='Please use DetectionPredictor.')
+@PREDICTORS.register_module()
+class DetrPredictor(DetectionPredictor):
+ """"""
+
+
@PREDICTORS.register_module()
class TorchYoloXPredictor(PredictorInterface):
diff --git a/easycv/predictors/face_keypoints_predictor.py b/easycv/predictors/face_keypoints_predictor.py
index 2c94f0a4..54b13424 100644
--- a/easycv/predictors/face_keypoints_predictor.py
+++ b/easycv/predictors/face_keypoints_predictor.py
@@ -25,6 +25,11 @@ class FaceKeypointsPredictor(PredictorV2):
Args:
model_path (str): Path of model path
config_file (str): config file path for model and processor to init. Defaults to None.
+ batch_size (int): batch size for forward.
+ device (str): Support 'cuda' or 'cpu', if is None, detect device automatically.
+ save_results (bool): Whether to save predict results.
+ save_path (str): File path for saving results, only valid when `save_results` is True.
+ pipelines (list[dict]): Data pipeline configs.
"""
def __init__(self,
@@ -34,7 +39,7 @@ class FaceKeypointsPredictor(PredictorV2):
device=None,
save_results=False,
save_path=None,
- mode='bgr'):
+ pipelines=None):
super(FaceKeypointsPredictor, self).__init__(
model_path,
config_file,
@@ -42,7 +47,7 @@ class FaceKeypointsPredictor(PredictorV2):
device=device,
save_results=save_results,
save_path=save_path,
- mode=mode)
+ pipelines=pipelines)
self.input_size = self.cfg.IMAGE_SIZE
self.point_number = self.cfg.POINT_NUMBER
diff --git a/easycv/predictors/hand_keypoints_predictor.py b/easycv/predictors/hand_keypoints_predictor.py
index 01d0b0ce..0e092488 100644
--- a/easycv/predictors/hand_keypoints_predictor.py
+++ b/easycv/predictors/hand_keypoints_predictor.py
@@ -25,9 +25,11 @@ class HandKeypointsPredictor(PredictorV2):
config_file: path or ``Config`` of config file
detection_model_config: dict of hand detection model predictor config,
example like ``dict(type="", model_path="", config_file="", ......)``
- batch_size: batch_size to infer
- save_results: bool
- save_path: path of result image
+ batch_size (int): batch size for forward.
+ device (str): Support 'cuda' or 'cpu', if is None, detect device automatically.
+ save_results (bool): Whether to save predict results.
+ save_path (str): File path for saving results, only valid when `save_results` is True.
+ pipelines (list[dict]): Data pipeline configs.
"""
def __init__(self,
@@ -38,7 +40,7 @@ class HandKeypointsPredictor(PredictorV2):
device=None,
save_results=False,
save_path=None,
- mode='rgb',
+ pipelines=None,
*args,
**kwargs):
super(HandKeypointsPredictor, self).__init__(
@@ -48,7 +50,7 @@ class HandKeypointsPredictor(PredictorV2):
device=device,
save_results=save_results,
save_path=save_path,
- mode=mode,
+ pipelines=pipelines,
*args,
**kwargs)
self.dataset_info = DatasetInfo(COCO_WHOLEBODY_HAND_DATASET_INFO)
@@ -70,52 +72,48 @@ class HandKeypointsPredictor(PredictorV2):
}
}
"""
- image_paths = input['inputs']
- batch_data = []
+ image_path = input['inputs']
+ data_list = []
box_id = 0
- for batch_index, image_path in enumerate(image_paths):
- det_bbox_result = input['results']['detection_boxes'][batch_index]
- det_bbox_scores = input['results']['detection_scores'][batch_index]
- img = mmcv.imread(image_path, 'color', self.mode)
- for bbox, score in zip(det_bbox_result, det_bbox_scores):
- center, scale = _box2cs(self.cfg.data_cfg['image_size'], bbox)
- # prepare data
- data = {
- 'image_file':
- image_path,
- 'img':
- img,
- 'image_id':
- batch_index,
- 'center':
- center,
- 'scale':
- scale,
- 'bbox_score':
- score,
- 'bbox_id':
- box_id, # need to be assigned if batch_size > 1
- 'dataset':
- 'coco_wholebody_hand',
- 'joints_3d':
- np.zeros((self.cfg.data_cfg.num_joints, 3),
- dtype=np.float32),
- 'joints_3d_visible':
- np.zeros((self.cfg.data_cfg.num_joints, 3),
- dtype=np.float32),
- 'rotation':
- 0,
- 'flip_pairs':
- self.dataset_info.flip_pairs,
- 'ann_info': {
- 'image_size':
- np.array(self.cfg.data_cfg['image_size']),
- 'num_joints': self.cfg.data_cfg['num_joints']
- }
+ det_bbox_result = input['detection_boxes']
+ det_bbox_scores = input['detection_scores']
+ img = mmcv.imread(image_path, 'color', self.INPUT_IMAGE_MODE)
+ for bbox, score in zip(det_bbox_result, det_bbox_scores):
+ center, scale = _box2cs(self.cfg.data_cfg['image_size'], bbox)
+ # prepare data
+ data = {
+ 'image_file':
+ image_path,
+ 'img':
+ img,
+ 'image_id':
+ 0,
+ 'center':
+ center,
+ 'scale':
+ scale,
+ 'bbox_score':
+ score,
+ 'bbox_id':
+ box_id, # need to be assigned if batch_size > 1
+ 'dataset':
+ 'coco_wholebody_hand',
+ 'joints_3d':
+ np.zeros((self.cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'joints_3d_visible':
+ np.zeros((self.cfg.data_cfg.num_joints, 3), dtype=np.float32),
+ 'rotation':
+ 0,
+ 'flip_pairs':
+ self.dataset_info.flip_pairs,
+ 'ann_info': {
+ 'image_size': np.array(self.cfg.data_cfg['image_size']),
+ 'num_joints': self.cfg.data_cfg['num_joints']
}
- batch_data.append(data)
- box_id += 1
- return batch_data
+ }
+ data_list.append(data)
+ box_id += 1
+ return data_list
def preprocess_single(self, input):
results = []
@@ -128,8 +126,11 @@ class HandKeypointsPredictor(PredictorV2):
"""Process all inputs list. And collate to batch and put to target device.
If you need custom ops to load or process a batch samples, you need to reimplement it.
"""
+ # hand det and return source image
+ det_results = self.detection_predictor(inputs, keep_inputs=True)
+
batch_outputs = []
- for i in inputs:
+ for i in det_results:
for res in self.preprocess_single(i, *args, **kwargs):
batch_outputs.append(res)
batch_outputs = self._collate_fn(batch_outputs)
@@ -137,37 +138,25 @@ class HandKeypointsPredictor(PredictorV2):
return batch_outputs
def postprocess(self, inputs, *args, **kwargs):
- output = {}
- output['keypoints'] = inputs['preds']
- output['boxes'] = inputs['boxes']
- for i, bbox in enumerate(output['boxes']):
+ keypoints = inputs['preds']
+ boxes = inputs['boxes']
+ for i, bbox in enumerate(boxes):
center, scale = bbox[:2], bbox[2:4]
- output['boxes'][i][:4] = bbox_cs2xyxy(center, scale)
- output['boxes'] = output['boxes'][:, :4]
- return output
-
- def __call__(self, inputs, keep_inputs=False):
- if isinstance(inputs, str):
- inputs = [inputs]
-
- results_list = []
- for i in range(0, len(inputs), self.batch_size):
- batch = inputs[i:max(len(inputs) - 1, i + self.batch_size)]
- # hand det and return source image
- det_results = self.detection_predictor(batch, keep_inputs=True)
- # hand keypoints
- batch_outputs = self.preprocess(det_results)
- batch_outputs = self.forward(batch_outputs)
- results = self.postprocess(batch_outputs)
- if keep_inputs:
- results = {'inputs': batch, 'results': results}
- # if dump, the outputs will not added to the return value to prevent taking up too much memory
- if self.save_results:
- self.dump([results], self.save_path, mode='ab+')
- else:
- results_list.append(results)
-
- return results_list
+ boxes[i][:4] = bbox_cs2xyxy(center, scale)
+ boxes = boxes[:, :4]
+ # TODO: support multi bboxes for a single sample
+ assert len(keypoints.shape) == 3
+ assert len(boxes.shape) == 2
+ batch_outputs = []
+ batch_size = keypoints.shape[0]
+ keypoints = np.split(keypoints, batch_size)
+ boxes = np.split(boxes, batch_size)
+ for i in range(batch_size):
+ batch_outputs.append({
+ 'keypoints': keypoints[i],
+ 'boxes': boxes[i]
+ })
+ return batch_outputs
def show_result(self,
image_path,
diff --git a/easycv/predictors/segmentation.py b/easycv/predictors/segmentation.py
index 6916817b..51365653 100644
--- a/easycv/predictors/segmentation.py
+++ b/easycv/predictors/segmentation.py
@@ -5,22 +5,25 @@ import numpy as np
import torch
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
-from torchvision.transforms import Compose
from easycv.core.visualization.image import imshow_bboxes
-from easycv.datasets.registry import PIPELINES
-from easycv.file import io
-from easycv.models import build_model
from easycv.predictors.builder import PREDICTORS
-from easycv.predictors.interface import PredictorInterface
-from easycv.utils.checkpoint import load_checkpoint
-from easycv.utils.config_tools import mmcv_config_fromfile
-from easycv.utils.registry import build_from_cfg
from .base import PredictorV2
@PREDICTORS.register_module()
class SegmentationPredictor(PredictorV2):
+ """Predictor for Segmentation.
+
+ Args:
+ model_path (str): Path of model path.
+ config_file (Optinal[str]): config file path for model and processor to init. Defaults to None.
+ batch_size (int): batch size for forward.
+ device (str): Support 'cuda' or 'cpu', if is None, detect device automatically.
+ save_results (bool): Whether to save predict results.
+ save_path (str): File path for saving results, only valid when `save_results` is True.
+ pipelines (list[dict]): Data pipeline configs.
+ """
def __init__(self,
model_path,
@@ -28,20 +31,21 @@ class SegmentationPredictor(PredictorV2):
batch_size=1,
device=None,
save_results=False,
- save_path=None):
- """Predict pipeline for Segmentation
+ save_path=None,
+ pipelines=None,
+ *args,
+ **kwargs):
- Args:
- model_path (str): Path of model path
- config_file (str): config file path for model and processor to init. Defaults to None.
- """
super(SegmentationPredictor, self).__init__(
model_path,
config_file,
batch_size=batch_size,
device=device,
save_results=save_results,
- save_path=save_path)
+ save_path=save_path,
+ pipelines=pipelines,
+ *args,
+ **kwargs)
self.CLASSES = self.cfg.CLASSES
self.PALETTE = self.cfg.PALETTE
@@ -123,71 +127,61 @@ class SegmentationPredictor(PredictorV2):
@PREDICTORS.register_module()
-class Mask2formerPredictor(PredictorInterface):
+class Mask2formerPredictor(SegmentationPredictor):
+ """Predictor for Mask2former.
- def __init__(self, model_path, model_config=None):
- """init model
+ Args:
+ model_path (str): Path of model path.
+ config_file (Optinal[str]): config file path for model and processor to init. Defaults to None.
+ batch_size (int): batch size for forward.
+ device (str): Support 'cuda' or 'cpu', if is None, detect device automatically.
+ save_results (bool): Whether to save predict results.
+ save_path (str): File path for saving results, only valid when `save_results` is True.
+ pipelines (list[dict]): Data pipeline configs.
+ """
- Args:
- model_path (str): Path of model path
- model_config (config, optional): config string for model to init. Defaults to None.
+ def __init__(self,
+ model_path,
+ config_file=None,
+ batch_size=1,
+ device=None,
+ save_results=False,
+ save_path=None,
+ pipelines=None,
+ task_mode='panoptic',
+ *args,
+ **kwargs):
+ super(Mask2formerPredictor, self).__init__(
+ model_path,
+ config_file,
+ batch_size=batch_size,
+ device=device,
+ save_results=save_results,
+ save_path=save_path,
+ pipelines=pipelines,
+ *args,
+ **kwargs)
+ self.task_mode = task_mode
+
+ def forward(self, inputs):
+ """Model forward.
"""
- self.model_path = model_path
+ with torch.no_grad():
+ outputs = self.model(**inputs, mode='test', encode=False)
+ return outputs
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
- self.model = None
- with io.open(self.model_path, 'rb') as infile:
- checkpoint = torch.load(infile, map_location='cpu')
-
- assert 'meta' in checkpoint and 'config' in checkpoint[
- 'meta'], 'meta.config is missing from checkpoint'
-
- self.cfg = checkpoint['meta']['config']
- self.classes = len(self.cfg.PALETTE)
- self.class_name = self.cfg.CLASSES
- # build model
- self.model = build_model(self.cfg.model)
-
- self.ckpt = load_checkpoint(
- self.model, self.model_path, map_location=self.device)
- self.model.to(self.device)
- self.model.eval()
-
- # build pipeline
- test_pipeline = self.cfg.test_pipeline
- pipeline = [build_from_cfg(p, PIPELINES) for p in test_pipeline]
- self.pipeline = Compose(pipeline)
-
- def predict(self, input_data_list, mode='panoptic'):
- """
- Args:
- input_data_list: a list of numpy array(in rgb order), each array is a sample
- to be predicted
- """
- output_list = []
- for idx, img in enumerate(input_data_list):
- output = {}
- if not isinstance(img, np.ndarray):
- img = np.asarray(img)
- data_dict = {'img': img}
- ori_shape = img.shape
- data_dict = self.pipeline(data_dict)
- img = data_dict['img']
- img[0] = torch.unsqueeze(img[0], 0).to(self.device)
- img_metas = [[
- img_meta._data for img_meta in data_dict['img_metas']
- ]]
- img_metas[0][0]['ori_shape'] = ori_shape
- res = self.model.forward_test(img, img_metas, encode=False)
- if mode == 'panoptic':
- output['pan'] = res['pan_results'][0]
- elif mode == 'instance':
- output['segms'] = res['detection_masks'][0]
- output['bboxes'] = res['detection_boxes'][0]
- output['scores'] = res['detection_scores'][0]
- output['labels'] = res['detection_classes'][0]
- output_list.append(output)
- return output_list
+ def postprocess(self, inputs):
+ output = {}
+ if self.task_mode == 'panoptic':
+ output['pan'] = inputs['pan_results'][0]
+ elif self.task_mode == 'instance':
+ output['segms'] = inputs['detection_masks'][0]
+ output['bboxes'] = inputs['detection_boxes'][0]
+ output['scores'] = inputs['detection_scores'][0]
+ output['labels'] = inputs['detection_classes'][0]
+ else:
+ raise ValueError(f'Not support model {self.task_mode}')
+ return output
def show_panoptic(self, img, pan_mask):
pan_label = np.unique(pan_mask)
@@ -214,147 +208,6 @@ class Mask2formerPredictor(PredictorInterface):
return instance_result
-@PREDICTORS.register_module()
-class SegFormerPredictor(PredictorInterface):
-
- def __init__(self, model_path, model_config):
- """init model
-
- Args:
- model_path (str): Path of model path
- model_config (config): config string for model to init. Defaults to None.
- """
- self.model_path = model_path
-
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
- self.model = None
- with io.open(self.model_path, 'rb') as infile:
- checkpoint = torch.load(infile, map_location='cpu')
-
- self.cfg = mmcv_config_fromfile(model_config)
- self.CLASSES = self.cfg.CLASSES
- self.PALETTE = self.cfg.PALETTE
- # build model
- self.model = build_model(self.cfg.model)
-
- self.ckpt = load_checkpoint(
- self.model, self.model_path, map_location=self.device)
- self.model.to(self.device)
- self.model.eval()
-
- # build pipeline
- test_pipeline = self.cfg.test_pipeline
- pipeline = [build_from_cfg(p, PIPELINES) for p in test_pipeline]
- self.pipeline = Compose(pipeline)
-
- def predict(self, input_data_list):
- """
- using session run predict a number of samples using batch_size
-
- Args:
- input_data_list: a list of numpy array(in rgb order), each array is a sample
- to be predicted
- use a fixed number if you do not want to adjust batch_size in runtime
- """
- output_list = []
- for idx, img in enumerate(input_data_list):
- if type(img) is not np.ndarray:
- img = np.asarray(img)
-
- ori_img_shape = img.shape[:2]
-
- data_dict = {'img': img}
- data_dict['ori_shape'] = ori_img_shape
- data_dict = self.pipeline(data_dict)
- img = data_dict['img']
- img = torch.unsqueeze(img[0], 0).to(self.device)
- data_dict.pop('img')
-
- with torch.no_grad():
- out = self.model([img],
- mode='test',
- img_metas=[[data_dict['img_metas'][0]._data]])
-
- output_list.append(out)
-
- return output_list
-
- def show_result(self,
- img,
- result,
- palette=None,
- win_name='',
- show=False,
- wait_time=0,
- out_file=None,
- opacity=0.5):
- """Draw `result` over `img`.
-
- Args:
- img (str or Tensor): The image to be displayed.
- result (Tensor): The semantic segmentation results to draw over
- `img`.
- palette (list[list[int]]] | np.ndarray | None): The palette of
- segmentation map. If None is given, random palette will be
- generated. Default: None
- win_name (str): The window name.
- wait_time (int): Value of waitKey param.
- Default: 0.
- show (bool): Whether to show the image.
- Default: False.
- out_file (str or None): The filename to write the image.
- Default: None.
- opacity(float): Opacity of painted segmentation map.
- Default 0.5.
- Must be in (0, 1] range.
- Returns:
- img (Tensor): Only if not `show` or `out_file`
- """
-
- img = mmcv.imread(img)
- img = img.copy()
- seg = result[0]
- if palette is None:
- if self.PALETTE is None:
- # Get random state before set seed,
- # and restore random state later.
- # It will prevent loss of randomness, as the palette
- # may be different in each iteration if not specified.
- # See: https://github.com/open-mmlab/mmdetection/issues/5844
- state = np.random.get_state()
- np.random.seed(42)
- # random palette
- palette = np.random.randint(
- 0, 255, size=(len(self.CLASSES), 3))
- np.random.set_state(state)
- else:
- palette = self.PALETTE
- palette = np.array(palette)
- assert palette.shape[0] == len(self.CLASSES)
- assert palette.shape[1] == 3
- assert len(palette.shape) == 2
- assert 0 < opacity <= 1.0
- color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
- for label, color in enumerate(palette):
- color_seg[seg == label, :] = color
- # convert to BGR
- color_seg = color_seg[..., ::-1]
-
- img = img * (1 - opacity) + color_seg * opacity
- img = img.astype(np.uint8)
- # if out_file specified, do not show image in window
- if out_file is not None:
- show = False
-
- if show:
- mmcv.imshow(img, win_name, wait_time)
- if out_file is not None:
- mmcv.imwrite(img, out_file)
-
- if not (show or out_file):
- return img
-
-
def _get_bias_color(base, max_dist=30):
"""Get different colors for each masks.
diff --git a/easycv/utils/checkpoint.py b/easycv/utils/checkpoint.py
index c583d9a0..4c987c83 100644
--- a/easycv/utils/checkpoint.py
+++ b/easycv/utils/checkpoint.py
@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
import os
import torch
@@ -8,6 +9,7 @@ from mmcv.runner.checkpoint import get_state_dict, weights_to_cpu
from torch.optim import Optimizer
from easycv.file import io
+from easycv.file.utils import is_url_path
from easycv.framework.errors import TypeError
from easycv.utils.constant import CACHE_DIR
@@ -32,28 +34,40 @@ def load_checkpoint(model,
Returns:
dict or OrderedDict: The loaded checkpoint.
"""
- if not filename.startswith('oss://'):
- return mmcv_load_checkpoint(
- model,
- filename,
- map_location=map_location,
- strict=strict,
- logger=logger)
- else:
+ if filename.startswith('oss://'):
_, fname = os.path.split(filename)
cache_file = os.path.join(CACHE_DIR, fname)
+ if not os.path.exists(CACHE_DIR):
+ os.makedirs(CACHE_DIR)
if not os.path.exists(cache_file):
- print(f'download checkpoint from {filename} to {cache_file}')
+ logging.info(
+ f'download checkpoint from {filename} to {cache_file}')
io.copy(filename, cache_file)
if torch.distributed.is_available(
) and torch.distributed.is_initialized():
torch.distributed.barrier()
- return mmcv_load_checkpoint(
- model,
- cache_file,
- map_location=map_location,
- strict=strict,
- logger=logger)
+ filename = cache_file
+ elif is_url_path(filename):
+ from torch.hub import urlparse, download_url_to_file
+ parts = urlparse(filename)
+ base_name = os.path.basename(parts.path)
+ cache_file = os.path.join(CACHE_DIR, base_name)
+ if not os.path.exists(CACHE_DIR):
+ os.makedirs(CACHE_DIR)
+ if not os.path.exists(cache_file):
+ logging.info(
+ f'download checkpoint from {filename} to {cache_file}')
+ download_url_to_file(filename, cache_file)
+ if torch.distributed.is_available(
+ ) and torch.distributed.is_initialized():
+ torch.distributed.barrier()
+ filename = cache_file
+ return mmcv_load_checkpoint(
+ model,
+ filename,
+ map_location=map_location,
+ strict=strict,
+ logger=logger)
def save_checkpoint(model, filename, optimizer=None, meta=None):
diff --git a/easycv/utils/constant.py b/easycv/utils/constant.py
index 981a8bbb..87afc813 100644
--- a/easycv/utils/constant.py
+++ b/easycv/utils/constant.py
@@ -1,4 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-CACHE_DIR = '.easycv_cache'
+import os
+
+CACHE_DIR = os.path.expanduser('~/.cache/easycv/')
MAX_READ_IMAGE_TRY_TIMES = 10
diff --git a/easycv/utils/misc.py b/easycv/utils/misc.py
index 8e544b96..cce21293 100644
--- a/easycv/utils/misc.py
+++ b/easycv/utils/misc.py
@@ -1,12 +1,12 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+import functools
+import inspect
import logging
+import warnings
from functools import partial
import mmcv
import numpy as np
-from six.moves import map, zip
-
-from easycv.models.backbones.repvgg_yolox_backbone import RepVGGBlock
def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
@@ -79,6 +79,8 @@ def reparameterize_models(model):
Args:
model: nn.Module
"""
+ from easycv.models.backbones.repvgg_yolox_backbone import RepVGGBlock
+
reparameterize_count = 0
for layer in model.modules():
if isinstance(layer, RepVGGBlock):
@@ -89,3 +91,31 @@ def reparameterize_models(model):
.format(reparameterize_count))
print('reparam:', reparameterize_count)
return model
+
+
+def deprecated(reason):
+ """
+ This is a decorator which can be used to mark functions
+ as deprecated. It will result in a warning being emitted
+ when the function is used.
+ """
+
+ def decorator(func1):
+ if inspect.isclass(func1):
+ fmt1 = 'Call to deprecated class {name} ({reason}).'
+ else:
+ fmt1 = 'Call to deprecated function {name} ({reason}).'
+
+ @functools.wraps(func1)
+ def new_func1(*args, **kwargs):
+ warnings.simplefilter('always', DeprecationWarning)
+ warnings.warn(
+ fmt1.format(name=func1.__name__, reason=reason),
+ category=DeprecationWarning,
+ stacklevel=2)
+ warnings.simplefilter('default', DeprecationWarning)
+ return func1(*args, **kwargs)
+
+ return new_func1
+
+ return decorator
diff --git a/tests/datasets/segmentation/test_seg_raw_dataset.py b/tests/datasets/segmentation/test_seg_raw_dataset.py
index 9ad7d489..b9e5a628 100644
--- a/tests/datasets/segmentation/test_seg_raw_dataset.py
+++ b/tests/datasets/segmentation/test_seg_raw_dataset.py
@@ -7,9 +7,7 @@ from tests.ut_config import (IMG_NORM_CFG_255, SEG_DATA_SMALL_RAW_LOCAL,
from easycv.core.evaluation.builder import build_evaluator
from easycv.datasets.builder import build_datasource
-from easycv.datasets.segmentation.data_sources.raw import SegSourceRaw
from easycv.datasets.segmentation.raw import SegDataset
-from easycv.file import io
class SegDatasetTest(unittest.TestCase):
diff --git a/tests/predictors/test_classifier.py b/tests/predictors/test_classifier.py
index 8aef4778..c546dfe6 100644
--- a/tests/predictors/test_classifier.py
+++ b/tests/predictors/test_classifier.py
@@ -8,14 +8,57 @@ import unittest
import cv2
import torch
-
-from easycv.predictors.classifier import TorchClassifier
-
+from easycv.predictors.builder import build_predictor
from easycv.utils.test_util import clean_up, get_tmp_dir
+from easycv.utils.config_tools import mmcv_config_fromfile
from tests.ut_config import (PRETRAINED_MODEL_RESNET50_WITHOUTHEAD,
IMAGENET_LABEL_TXT, TEST_IMAGES_DIR)
+class ClassificationPredictorTest(unittest.TestCase):
+
+ def setUp(self):
+ print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
+
+ def test_single(self):
+ checkpoint = PRETRAINED_MODEL_RESNET50_WITHOUTHEAD
+ config_file = 'configs/classification/imagenet/resnet/imagenet_resnet50_jpg.py'
+ cfg = mmcv_config_fromfile(config_file)
+ predict_op = build_predictor(
+ dict(
+ **cfg.predict,
+ model_path=checkpoint,
+ config_file=config_file,
+ label_map_path=IMAGENET_LABEL_TXT))
+ img_path = os.path.join(TEST_IMAGES_DIR, 'catb.jpg')
+
+ results = predict_op([img_path])[0]
+ self.assertListEqual(results['class'], [283])
+ self.assertListEqual(results['class_name'], ['"Persian cat",'])
+ self.assertEqual(len(results['class_probs']), 1000)
+
+ def test_batch(self):
+ checkpoint = PRETRAINED_MODEL_RESNET50_WITHOUTHEAD
+ config_file = 'configs/classification/imagenet/resnet/imagenet_resnet50_jpg.py'
+ cfg = mmcv_config_fromfile(config_file)
+ predict_op = build_predictor(
+ dict(
+ **cfg.predict,
+ model_path=checkpoint,
+ config_file=config_file,
+ label_map_path=IMAGENET_LABEL_TXT,
+ batch_size=3))
+ img_path = os.path.join(TEST_IMAGES_DIR, 'catb.jpg')
+
+ num_imgs = 4
+ results = predict_op([img_path] * num_imgs)
+ self.assertEqual(len(results), num_imgs)
+ for res in results:
+ self.assertListEqual(res['class'], [283])
+ self.assertListEqual(res['class_name'], ['"Persian cat",'])
+ self.assertEqual(len(res['class_probs']), 1000)
+
+
class TorchClassifierTest(unittest.TestCase):
def setUp(self):
@@ -62,6 +105,8 @@ class TorchClassifierTest(unittest.TestCase):
output_ckpt = f'{self.tmp_dir}/export.pth'
torch.save(output_dict, output_ckpt)
+ from easycv.predictors.classifier import TorchClassifier
+
fe = TorchClassifier(
output_ckpt, topk=topk, label_map_path=IMAGENET_LABEL_TXT)
diff --git a/tests/predictors/test_detector.py b/tests/predictors/test_detector.py
index c3be2ed6..1b160a01 100644
--- a/tests/predictors/test_detector.py
+++ b/tests/predictors/test_detector.py
@@ -4,11 +4,11 @@ isort:skip_file
"""
import os
import unittest
-
+import tempfile
import numpy as np
from PIL import Image
-from easycv.predictors.detector import TorchYoloXPredictor, DetrPredictor
+from easycv.predictors.detector import TorchYoloXPredictor, DetectionPredictor
from tests.ut_config import (PRETRAINED_MODEL_YOLOXS_EXPORT,
PRETRAINED_MODEL_YOLOXS_EXPORT_OLD,
PRETRAINED_MODEL_YOLOXS_NOPRE_NOTRT_JIT,
@@ -154,25 +154,18 @@ class DetectorTest(unittest.TestCase):
[510.37033, 268.4982, 527.67017, 273.04935]]),
decimal=1)
- def test_vitdet_detector(self):
- model_path = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100_export.pth'
- img = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/demo/demo.jpg'
- out_file = './result.jpg'
- vitdet = DetrPredictor(model_path)
- output = vitdet.predict(img)
- vitdet.visualize(img, output, out_file=out_file)
-
+ def _detection_detector_assert(self, output):
self.assertIn('detection_boxes', output)
self.assertIn('detection_scores', output)
self.assertIn('detection_classes', output)
self.assertIn('detection_masks', output)
self.assertIn('img_metas', output)
- self.assertEqual(len(output['detection_boxes'][0]), 33)
- self.assertEqual(len(output['detection_scores'][0]), 33)
- self.assertEqual(len(output['detection_classes'][0]), 33)
+ self.assertEqual(len(output['detection_boxes']), 33)
+ self.assertEqual(len(output['detection_scores']), 33)
+ self.assertEqual(len(output['detection_classes']), 33)
self.assertListEqual(
- output['detection_classes'][0].tolist(),
+ output['detection_classes'].tolist(),
np.array([
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 7, 7, 13, 13, 13, 56
@@ -180,7 +173,7 @@ class DetectorTest(unittest.TestCase):
dtype=np.int32).tolist())
assert_array_almost_equal(
- output['detection_scores'][0],
+ output['detection_scores'],
np.array([
0.9975854158401489, 0.9965696334838867, 0.9922919869422913,
0.9833580851554871, 0.983080267906189, 0.970454752445221,
@@ -198,7 +191,7 @@ class DetectorTest(unittest.TestCase):
decimal=2)
assert_array_almost_equal(
- output['detection_boxes'][0],
+ output['detection_boxes'],
np.array([[
294.22674560546875, 116.6078109741211, 379.4328918457031,
150.14097595214844
@@ -333,6 +326,32 @@ class DetectorTest(unittest.TestCase):
]]),
decimal=1)
+ def test_detection_detector_single(self):
+ model_path = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100_export.pth'
+ img = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/demo/demo.jpg'
+ vitdet = DetectionPredictor(model_path, score_threshold=0.0)
+ output = vitdet(img)
+ output = output[0]
+ with tempfile.NamedTemporaryFile(suffix='.jpg') as tmp_file:
+ tmp_save_path = tmp_file.name
+ vitdet.visualize(img, output, out_file=tmp_save_path)
+ self._detection_detector_assert(output)
+
+ def test_detection_detector_batch(self):
+ model_path = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/detection/vitdet/vit_base/epoch_100_export.pth'
+ img = 'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/demo/demo.jpg'
+ vitdet = DetectionPredictor(
+ model_path, score_threshold=0.0, batch_size=2)
+ num_samples = 3
+ images = [img] * num_samples
+ outputs = vitdet(images)
+ self.assertEqual(len(outputs), num_samples)
+ for output in outputs:
+ with tempfile.NamedTemporaryFile(suffix='.jpg') as tmp_file:
+ tmp_save_path = tmp_file.name
+ vitdet.visualize(img, output, out_file=tmp_save_path)
+ self._detection_detector_assert(output)
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/predictors/test_detector_blade.py b/tests/predictors/test_detector_blade.py
index 3f3aae65..143425a3 100644
--- a/tests/predictors/test_detector_blade.py
+++ b/tests/predictors/test_detector_blade.py
@@ -3,22 +3,14 @@
isort:skip_file
"""
import os
-import tempfile
import unittest
-import cv2
import numpy as np
from PIL import Image
from easycv.predictors.detector import TorchYoloXPredictor
-from tests.ut_config import (PRETRAINED_MODEL_YOLOXS_EXPORT,
- PRETRAINED_MODEL_YOLOXS_NOPRE_NOTRT_JIT,
- PRETRAINED_MODEL_YOLOXS_PRE_NOTRT_JIT,
- PRETRAINED_MODEL_YOLOXS_NOPRE_NOTRT_BLADE,
+from tests.ut_config import (PRETRAINED_MODEL_YOLOXS_NOPRE_NOTRT_BLADE,
PRETRAINED_MODEL_YOLOXS_PRE_NOTRT_BLADE,
DET_DATA_SMALL_COCO_LOCAL)
-from easycv.utils.test_util import benchmark
-import logging
-import pandas as pd
import torch
from numpy.testing import assert_array_almost_equal
@@ -37,7 +29,6 @@ class DetectorTest(unittest.TestCase):
input_data_list = [np.asarray(Image.open(img))]
blade_path = PRETRAINED_MODEL_YOLOXS_NOPRE_NOTRT_BLADE
- # blade_path = '/home/zouxinyi.zxy/easycv_nfs/pretrained_models/detection/infer_yolox/debug_blade.pt.blade'
predictor_blade = TorchYoloXPredictor(
model_path=blade_path, score_thresh=0.5)
diff --git a/tests/predictors/test_face_keypoints_predictor.py b/tests/predictors/test_face_keypoints_predictor.py
index 67482e51..3f62319a 100644
--- a/tests/predictors/test_face_keypoints_predictor.py
+++ b/tests/predictors/test_face_keypoints_predictor.py
@@ -19,7 +19,7 @@ class FaceKeypointsPredictorWithoutDetectorTest(unittest.TestCase):
def test_single(self):
predict_pipeline = FaceKeypointsPredictor(
model_path=self.model_path, config_file=self.model_config_path)
- output = predict_pipeline(self.image_path)[0][0]
+ output = predict_pipeline(self.image_path)[0]
output_keypoints = output['point']
output_pose = output['pose']
img = cv2.imread(self.image_path)
@@ -38,18 +38,10 @@ class FaceKeypointsPredictorWithoutDetectorTest(unittest.TestCase):
total_samples = 3
output = predict_pipeline([self.image_path] * total_samples)
- self.assertEqual(len(output), 2)
- self.assertEqual(len(output[0]), 2)
- self.assertEqual(len(output[1]), 1)
- self.assertEqual(output[0][0]['point'].shape[0], 106)
- self.assertEqual(output[0][0]['point'].shape[1], 2)
- self.assertEqual(output[0][0]['pose'].shape[0], 3)
- self.assertEqual(output[0][1]['point'].shape[0], 106)
- self.assertEqual(output[0][1]['point'].shape[1], 2)
- self.assertEqual(output[0][1]['pose'].shape[0], 3)
- self.assertEqual(output[1][0]['point'].shape[0], 106)
- self.assertEqual(output[1][0]['point'].shape[1], 2)
- self.assertEqual(output[1][0]['pose'].shape[0], 3)
+ self.assertEqual(len(output), total_samples)
+ for out in output:
+ self.assertEqual(out['point'].shape, (106, 2))
+ self.assertEqual(out['pose'].shape, (3, ))
if __name__ == '__main__':
diff --git a/tests/predictors/test_hand_keypoints_predictor.py b/tests/predictors/test_hand_keypoints_predictor.py
index b2bca4cf..4a325098 100644
--- a/tests/predictors/test_hand_keypoints_predictor.py
+++ b/tests/predictors/test_hand_keypoints_predictor.py
@@ -39,6 +39,37 @@ class HandKeypointsPredictorTest(unittest.TestCase):
self.assertEqual(keypoints.shape[1], 21)
self.assertEqual(keypoints.shape[2], 3)
+ def test_batch(self):
+ config = mmcv_config_fromfile(self.model_config_path)
+ predict_pipeline = HandKeypointsPredictor(
+ model_path=self.model_path,
+ config_file=config,
+ batch_size=2,
+ detection_predictor_config=dict(
+ type='DetectionPredictor',
+ model_path=MM_DEFAULT_HAND_DETECTION_SSDLITE_MODEL_PATH,
+ config_file=MM_DEFAULT_HAND_DETECTION_SSDLITE_CONFIG_FILE,
+ score_threshold=0.5))
+
+ num_samples = 4
+ outputs = predict_pipeline(
+ [self.image_path] * num_samples, keep_inputs=True)
+ base_keypoints = outputs[0]['keypoints']
+ base_boxes = outputs[0]['boxes']
+ for output in outputs:
+ keypoints = output['keypoints']
+ boxes = output['boxes']
+ image_show = predict_pipeline.show_result(
+ self.image_path,
+ keypoints,
+ boxes,
+ save_path=self.save_image_path)
+ self.assertEqual(keypoints.shape, (1, 21, 3))
+ self.assertEqual(boxes.shape, (1, 4))
+ self.assertListEqual(keypoints.tolist(), base_keypoints.tolist())
+ self.assertListEqual(boxes.tolist(), base_boxes.tolist())
+ self.assertEqual(output['inputs'], self.image_path)
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/predictors/test_segmentation.py b/tests/predictors/test_segmentation.py
index e84a3e1a..5b36a2fb 100644
--- a/tests/predictors/test_segmentation.py
+++ b/tests/predictors/test_segmentation.py
@@ -8,6 +8,7 @@ import unittest
import numpy as np
from PIL import Image
from tests.ut_config import (MODEL_CONFIG_SEGFORMER,
+ PRETRAINED_MODEL_MASK2FORMER_DIR,
PRETRAINED_MODEL_SEGFORMER, TEST_IMAGES_DIR)
from easycv.predictors.segmentation import SegmentationPredictor
@@ -31,14 +32,14 @@ class SegmentationPredictorTest(unittest.TestCase):
outputs = predict_pipeline(img_path, keep_inputs=True)
self.assertEqual(len(outputs), 1)
- self.assertEqual(outputs[0]['inputs'], [img_path])
+ results = outputs[0]
+ self.assertEqual(results['inputs'], img_path)
- results = outputs[0]['results']
self.assertListEqual(
- list(img.shape)[:2], list(results['seg_pred'][0].shape))
- self.assertListEqual(results['seg_pred'][0][1, :10].tolist(),
+ list(img.shape)[:2], list(results['seg_pred'].shape))
+ self.assertListEqual(results['seg_pred'][1, :10].tolist(),
[161 for i in range(10)])
- self.assertListEqual(results['seg_pred'][0][-1, -10:].tolist(),
+ self.assertListEqual(results['seg_pred'][-1, -10:].tolist(),
[133 for i in range(10)])
def test_batch(self):
@@ -56,19 +57,15 @@ class SegmentationPredictorTest(unittest.TestCase):
total_samples = 3
outputs = predict_pipeline(
[img_path] * total_samples, keep_inputs=True)
- self.assertEqual(len(outputs), 2)
+ self.assertEqual(len(outputs), 3)
- self.assertEqual(outputs[0]['inputs'], [img_path] * 2)
- self.assertEqual(outputs[1]['inputs'], [img_path] * 1)
- self.assertEqual(len(outputs[0]['results']['seg_pred']), 2)
- self.assertEqual(len(outputs[1]['results']['seg_pred']), 1)
-
- for result in [outputs[0]['results'], outputs[1]['results']]:
+ for i in range(len(outputs)):
+ self.assertEqual(outputs[i]['inputs'], img_path)
self.assertListEqual(
- list(img.shape)[:2], list(result['seg_pred'][0].shape))
- self.assertListEqual(result['seg_pred'][0][1, :10].tolist(),
+ list(img.shape)[:2], list(outputs[i]['seg_pred'].shape))
+ self.assertListEqual(outputs[i]['seg_pred'][1, :10].tolist(),
[161 for i in range(10)])
- self.assertListEqual(result['seg_pred'][0][-1, -10:].tolist(),
+ self.assertListEqual(outputs[i]['seg_pred'][-1, -10:].tolist(),
[133 for i in range(10)])
def test_dump(self):
@@ -91,17 +88,47 @@ class SegmentationPredictorTest(unittest.TestCase):
total_samples = 3
outputs = predict_pipeline(
- [img_path] * total_samples, keep_inputs=True)
+ [img_path] * total_samples, keep_inputs=False)
self.assertEqual(outputs, [])
with open(tmp_path, 'rb') as f:
results = pickle.loads(f.read())
- self.assertIn('inputs', results[0])
- self.assertIn('results', results[0])
+ for res in results:
+ self.assertNotIn('inputs', res)
+ self.assertIn('seg_pred', res)
shutil.rmtree(temp_dir, ignore_errors=True)
+@unittest.skipIf(True, 'WIP')
+class Mask2formerPredictorTest(unittest.TestCase):
+
+ def test_single(self):
+ import cv2
+ from easycv.predictors.segmentation import Mask2formerPredictor
+ pan_ckpt = os.path.join(PRETRAINED_MODEL_MASK2FORMER_DIR,
+ 'mask2former_pan_export.pth')
+ instance_ckpt = os.path.join(PRETRAINED_MODEL_MASK2FORMER_DIR,
+ 'mask2former_r50_instance.pth')
+ img_path = os.path.join(TEST_IMAGES_DIR, 'mask2former.jpg')
+
+ # panop
+ predictor = Mask2formerPredictor(
+ model_path=pan_ckpt, output_mode='panoptic')
+ img = cv2.imread(img_path)
+ predict_out = predictor([img])
+ pan_img = predictor.show_panoptic(img, predict_out[0]['pan'])
+ cv2.imwrite('pan_out.jpg', pan_img)
+
+ # instance
+ predictor = Mask2formerPredictor(
+ model_path=instance_ckpt, output_mode='instance')
+ img = cv2.imread(img_path)
+ predict_out = predictor.predict([img], mode='instance')
+ instance_img = predictor.show_instance(img, **predict_out[0])
+ cv2.imwrite('instance_out.jpg', instance_img)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/predictors/test_segmentor.py b/tests/predictors/test_segmentor.py
deleted file mode 100644
index 1ca3eece..00000000
--- a/tests/predictors/test_segmentor.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-"""
-isort:skip_file
-"""
-import os
-import unittest
-
-import numpy as np
-from PIL import Image
-
-from tests.ut_config import TEST_IMAGES_DIR
-from tests.ut_config import (PRETRAINED_MODEL_SEGFORMER,
- MODEL_CONFIG_SEGFORMER)
-from easycv.predictors.segmentation import SegFormerPredictor
-
-
-class SegmentorTest(unittest.TestCase):
-
- def setUp(self):
- print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
-
- def test_segformer_detector(self):
- segmentation_model_path = PRETRAINED_MODEL_SEGFORMER
- segmentation_model_config = MODEL_CONFIG_SEGFORMER
-
- img = os.path.join(TEST_IMAGES_DIR, '000000289059.jpg')
- if not os.path.exists(img):
- img = './data/test/segmentation/coco_stuff_164k/val2017/000000289059.jpg'
-
- input_data_list = [np.asarray(Image.open(img))]
- predictor = SegFormerPredictor(
- model_path=segmentation_model_path,
- model_config=segmentation_model_config)
-
- output = predictor.predict(input_data_list)[0]
- self.assertIn('seg_pred', output)
-
- self.assertListEqual(
- list(input_data_list[0].shape)[:2],
- list(output['seg_pred'][0].shape))
- self.assertListEqual(output['seg_pred'][0][1, :10].tolist(),
- [161 for i in range(10)])
- self.assertListEqual(output['seg_pred'][0][-1, -10:].tolist(),
- [133 for i in range(10)])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/ut_config.py b/tests/ut_config.py
index e053b630..42c8620b 100644
--- a/tests/ut_config.py
+++ b/tests/ut_config.py
@@ -120,10 +120,10 @@ PRETRAINED_MODEL_YOLOX_COMPRESSION = os.path.join(
BASE_LOCAL_PATH, 'pretrained_models/compression/yolox_compression.pth')
PRETRAINED_MODEL_MAE = os.path.join(
BASE_LOCAL_PATH, 'pretrained_models/classification/vit/mae_vit_b_1600.pth')
-PRETRAINED_MODEL_MASK2FORMER = os.path.join(
- BASE_LOCAL_PATH,
- 'pretrained_models/segmentation/mask2former/mask2former_r50_instance.pth')
-
+PRETRAINED_MODEL_MASK2FORMER_DIR = os.path.join(
+ BASE_LOCAL_PATH, 'pretrained_models/segmentation/mask2former/')
+PRETRAINED_MODEL_MASK2FORMER = os.path.join(PRETRAINED_MODEL_MASK2FORMER_DIR,
+ 'mask2former_r50_instance.pth')
PRETRAINED_MODEL_SEGFORMER = os.path.join(
BASE_LOCAL_PATH,
'pretrained_models/segmentation/segformer/segformer_b0/SegmentationEvaluator_mIoU_best.pth'
diff --git a/thirdparty/u2sod/sodpredictor.py b/thirdparty/u2sod/sodpredictor.py
index a336c1ac..ff981379 100644
--- a/thirdparty/u2sod/sodpredictor.py
+++ b/thirdparty/u2sod/sodpredictor.py
@@ -21,6 +21,7 @@ except:
from easycv.predictors.builder import build_predictor, PREDICTORS
+from easycv.utils.constant import CACHE_DIR
def normPRED(d):
@@ -47,8 +48,8 @@ class SODPredictor(object):
"""
def load_url_weights(name, url_index="http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/evtorch_thirdparty/u2net_sod/", map_location=None):
- os.makedirs('.easycv_cache', exist_ok=True)
- local_model = os.path.join('.easycv_cache', name+'.pth')
+ os.makedirs(CACHE_DIR, exist_ok=True)
+ local_model = os.path.join(CACHE_DIR, name+'.pth')
if os.path.exists(local_model):
weights = torch.load(local_model)
if weights is not None: