From 347b49e77fa0643740bb4fc4570699b106d6cdb3 Mon Sep 17 00:00:00 2001 From: Yixiao Fang <36138628+fangyixiao18@users.noreply.github.com> Date: Wed, 21 Dec 2022 16:21:12 +0800 Subject: [PATCH] [Refactor] Refactor t-SNE (#629) * refactor tsne * update configs * update docs * add cls_token option --- .../classification/tsne_imagenet.py | 26 ---- configs/tsne/resnet50_imagenet.py | 48 +++++++ configs/tsne/swin-base_imagenet.py | 42 ++++++ configs/tsne/vit-base-p16_imagenet.py | 45 ++++++ docs/en/user_guides/visualization.md | 19 ++- tools/analysis_tools/visualize_tsne.py | 134 ++++++++---------- 6 files changed, 207 insertions(+), 107 deletions(-) delete mode 100644 configs/benchmarks/classification/tsne_imagenet.py create mode 100644 configs/tsne/resnet50_imagenet.py create mode 100644 configs/tsne/swin-base_imagenet.py create mode 100644 configs/tsne/vit-base-p16_imagenet.py diff --git a/configs/benchmarks/classification/tsne_imagenet.py b/configs/benchmarks/classification/tsne_imagenet.py deleted file mode 100644 index 0dc0bf68..00000000 --- a/configs/benchmarks/classification/tsne_imagenet.py +++ /dev/null @@ -1,26 +0,0 @@ -dataset_type = 'mmcls.ImageNet' -data_root = 'data/imagenet/' -file_client_args = dict(backend='disk') -name = 'imagenet_val' - -extract_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmcls.ResizeEdge', scale=256, edge='short'), - dict(type='CenterCrop', crop_size=224), - dict(type='PackSelfSupInputs'), -] - -extract_dataloader = dict( - batch_size=8, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root='data/imagenet', - ann_file='meta/val.txt', - data_prefix='val', - pipeline=extract_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), -) - -# pooling cfg -pool_cfg = dict(type='MultiPooling', in_indices=(1, 2, 3, 4)) diff --git a/configs/tsne/resnet50_imagenet.py b/configs/tsne/resnet50_imagenet.py new file mode 100644 index 00000000..7d93906c --- /dev/null +++ b/configs/tsne/resnet50_imagenet.py @@ -0,0 +1,48 @@ +_base_ = 'mmcls::_base_/default_runtime.py' + +model = dict( + _scope_='mmcls', + type='ImageClassifier', + data_preprocessor=dict( + num_classes=1000, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True, + ), + backbone=dict( + type='ResNet', + depth=50, + in_channels=3, + num_stages=4, + out_indices=(3), + norm_cfg=dict(type='BN'), + frozen_stages=-1), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=2048, + loss=dict(type='CrossEntropyLoss', loss_weight=1.0), + topk=(1, 5), + )) + +dataset_type = 'mmcls.ImageNet' +data_root = 'data/imagenet/' +file_client_args = dict(backend='disk') +extract_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmcls.ResizeEdge', scale=256, edge='short'), + dict(type='CenterCrop', crop_size=224), + dict(type='mmcls.PackClsInputs'), +] +extract_dataloader = dict( + batch_size=8, + num_workers=4, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=extract_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), +) diff --git a/configs/tsne/swin-base_imagenet.py b/configs/tsne/swin-base_imagenet.py new file mode 100644 index 00000000..64827a66 --- /dev/null +++ b/configs/tsne/swin-base_imagenet.py @@ -0,0 +1,42 @@ +_base_ = 'mmcls::_base_/default_runtime.py' + +model = dict( + _scope_='mmcls', + type='ImageClassifier', + backbone=dict( + type='SwinTransformer', + arch='base', + img_size=192, + out_indices=-1, + drop_path_rate=0.1, + stage_cfgs=dict(block_cfgs=dict(window_size=6))), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=1024, + init_cfg=None, # suppress the default init_cfg of LinearClsHead. + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + cal_acc=False)) + +dataset_type = 'mmcls.ImageNet' +data_root = 'data/imagenet/' +file_client_args = dict(backend='disk') +extract_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmcls.ResizeEdge', scale=256, edge='short'), + dict(type='CenterCrop', crop_size=224), + dict(type='mmcls.PackClsInputs'), +] +extract_dataloader = dict( + batch_size=8, + num_workers=4, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=extract_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), +) diff --git a/configs/tsne/vit-base-p16_imagenet.py b/configs/tsne/vit-base-p16_imagenet.py new file mode 100644 index 00000000..1569e589 --- /dev/null +++ b/configs/tsne/vit-base-p16_imagenet.py @@ -0,0 +1,45 @@ +_base_ = 'mmcls::_base_/default_runtime.py' + +model = dict( + _scope_='mmcls', + type='ImageClassifier', + backbone=dict( + type='VisionTransformer', + arch='base', + img_size=224, + patch_size=16, + out_indices=-1, + drop_path_rate=0.1, + avg_token=False, + output_cls_token=False, + final_norm=False), + neck=None, + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=768, + loss=dict( + type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), + init_cfg=[dict(type='TruncNormal', layer='Linear', std=2e-5)]), +) + +dataset_type = 'mmcls.ImageNet' +data_root = 'data/imagenet/' +file_client_args = dict(backend='disk') +extract_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmcls.ResizeEdge', scale=256, edge='short'), + dict(type='CenterCrop', crop_size=224), + dict(type='mmcls.PackClsInputs'), +] +extract_dataloader = dict( + batch_size=8, + num_workers=4, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=extract_pipeline), + sampler=dict(type='DefaultSampler', shuffle=False), +) diff --git a/docs/en/user_guides/visualization.md b/docs/en/user_guides/visualization.md index 6f167370..3f6e710a 100644 --- a/docs/en/user_guides/visualization.md +++ b/docs/en/user_guides/visualization.md @@ -123,21 +123,26 @@ python tools/analysis_tools/visualize_tsne.py ${CONFIG_FILE} --checkpoint ${CKPT Arguments: -- `CONFIG_FILE`: config file for the pre-trained model. -- `CKPT_PATH`: the path of model's checkpoint. +- `CONFIG_FILE`: config file for t-SNE, which listed in the directory `configs/tsne/` +- `CKPT_PATH`: the path or link of the model's checkpoint. - `WORK_DIR`: the directory to save the results of visualization. -- `[optional arguments]`: for optional arguments, you can refer to [visualize_tsne.py](https://github.com/open-mmlab/mmselfsup/blob/master/tools/analysis_tools/visualize_tsne.py) +- `[optional arguments]`: for optional arguments, you can refer to [visualize_tsne.py](https://github.com/open-mmlab/mmselfsup/blob/dev-1.x/tools/analysis_tools/visualize_tsne.py) -An example: +An example of command: ```shell -python tools/analysis_tools/visualize_tsne.py configs/selfsup/simsiam/simsiam_resnet50_8xb32-coslr-100e_in1k.py --checkpoint epoch_100.pth --work-dir work_dirs/selfsup/simsiam_resnet50_8xb32-coslr-200e_in1k +python ./tools/analysis_tools/visualize_tsne.py \ + configs/tsne/resnet50_imagenet.py \ + --checkpoint https://download.openmmlab.com/mmselfsup/1.x/mocov2/mocov2_resnet50_8xb32-coslr-200e_in1k/mocov2_resnet50_8xb32-coslr-200e_in1k_20220825-b6d23c86.pth \ + --work-dir ./work_dirs/tsne/mocov2/ \ + --max-num-class 100 ``` -An example of visualization: +An example of visualization, left is from `MoCoV2_ResNet50` and right is from `MAE_ViT-base`: