From 80e8504956e8dd42187cc893e58e99a3573c55a6 Mon Sep 17 00:00:00 2001 From: MengzhangLI Date: Tue, 25 Jan 2022 20:45:39 +0800 Subject: [PATCH] [Doc] Update `README.md` in configs according to latest standard. (#1233) * fix README.md in configs * fix README.md in configs * modify [ALGORITHM] to [BACKBONE] in backbone config README.md --- .dev/md2yml.py | 14 ++++---------- README.md | 2 +- README_zh-CN.md | 2 +- configs/ann/README.md | 11 ++++++----- configs/apcnet/README.md | 11 +++++------ configs/bisenetv1/README.md | 11 +++++------ configs/bisenetv2/README.md | 10 +++++----- configs/ccnet/README.md | 10 +++++----- configs/cgnet/README.md | 11 +++++------ configs/cgnet/cgnet.yml | 2 +- configs/danet/README.md | 11 +++++------ configs/deeplabv3/README.md | 16 ++++++---------- configs/deeplabv3plus/README.md | 18 +++++++----------- configs/dmnet/README.md | 11 +++++------ configs/dnlnet/README.md | 11 +++++------ configs/dpt/README.md | 11 +++++------ configs/emanet/README.md | 11 +++++------ configs/encnet/README.md | 11 +++++------ configs/erfnet/README.md | 11 +++++------ configs/fastfcn/README.md | 11 +++++------ configs/fastscnn/README.md | 11 +++++------ configs/fcn/README.md | 11 +++++------ configs/gcnet/README.md | 11 +++++------ configs/hrnet/README.md | 13 ++++++------- configs/icnet/README.md | 11 +++++------ configs/isanet/README.md | 14 ++++++++------ configs/mobilenet_v2/README.md | 13 ++++++------- configs/mobilenet_v3/README.md | 13 ++++++------- configs/nonlocal_net/README.md | 11 +++++------ configs/ocrnet/README.md | 11 +++++------ configs/point_rend/README.md | 11 +++++------ configs/psanet/README.md | 11 +++++------ configs/pspnet/README.md | 11 +++++------ configs/resnest/README.md | 13 ++++++------- configs/segformer/README.md | 11 +++++------ configs/segformer/segformer.yml | 3 ++- configs/sem_fpn/README.md | 27 +++++++++++---------------- configs/setr/README.md | 11 +++++------ configs/stdc/README.md | 11 +++++------ configs/swin/README.md | 13 ++++++------- configs/twins/README.md | 13 ++++++------- configs/unet/README.md | 11 +++++------ configs/upernet/README.md | 11 +++++------ configs/vit/README.md | 11 +++++------ configs/vit/vit.yml | 2 +- 45 files changed, 224 insertions(+), 271 deletions(-) diff --git a/.dev/md2yml.py b/.dev/md2yml.py index 4f7b876b8..4c2e129f2 100755 --- a/.dev/md2yml.py +++ b/.dev/md2yml.py @@ -87,12 +87,13 @@ def parse_md(md_file): current_dataset = '' while i < len(lines): line = lines[i].strip() + # In latest README.md the title and url are in the third line. + if i == 2: + paper_url = lines[i].split('](')[1].split(')')[0] + paper_title = lines[i].split('](')[0].split('[')[1] if len(line) == 0: i += 1 continue - if line[:2] == '# ': - paper_title = line.replace('# ', '') - i += 1 elif line[:3] == ' -
-ANN (ICCV'2019) -```latex +## Citation + +```bibtex @inproceedings{zhu2019asymmetric, title={Asymmetric non-local neural networks for semantic segmentation}, author={Zhu, Zhen and Xu, Mengde and Bai, Song and Huang, Tengteng and Bai, Xiang}, @@ -32,7 +34,6 @@ The non-local module works as a particularly useful technique for semantic segme } ``` -
## Results and models diff --git a/configs/apcnet/README.md b/configs/apcnet/README.md index 9ebb090e2..5e1fd6b42 100644 --- a/configs/apcnet/README.md +++ b/configs/apcnet/README.md @@ -1,4 +1,6 @@ -# Adaptive Pyramid Context Network for Semantic Segmentation +# APCNet + +[Adaptive Pyramid Context Network for Semantic Segmentation](https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html) ## Introduction @@ -19,10 +21,9 @@ Recent studies witnessed that context features can significantly improve the per -
-APCNet (CVPR'2019) +## Citation -```latex +```bibtex @InProceedings{He_2019_CVPR, author = {He, Junjun and Deng, Zhongying and Zhou, Lei and Wang, Yali and Qiao, Yu}, title = {Adaptive Pyramid Context Network for Semantic Segmentation}, @@ -32,8 +33,6 @@ year = {2019} } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/bisenetv1/README.md b/configs/bisenetv1/README.md index 2438e902d..7a9e5faa9 100644 --- a/configs/bisenetv1/README.md +++ b/configs/bisenetv1/README.md @@ -1,4 +1,6 @@ -# BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation +# BiSeNetV1 + +[BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897) ## Introduction @@ -19,10 +21,9 @@ Semantic segmentation requires both rich spatial information and sizeable recept -
-BiSeNetV1 (ECCV'2018) +## Citation -```latex +```bibtex @inproceedings{yu2018bisenet, title={Bisenet: Bilateral segmentation network for real-time semantic segmentation}, author={Yu, Changqian and Wang, Jingbo and Peng, Chao and Gao, Changxin and Yu, Gang and Sang, Nong}, @@ -32,8 +33,6 @@ Semantic segmentation requires both rich spatial information and sizeable recept } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/bisenetv2/README.md b/configs/bisenetv2/README.md index 825a07a0d..1bc742490 100644 --- a/configs/bisenetv2/README.md +++ b/configs/bisenetv2/README.md @@ -1,4 +1,6 @@ -# Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation +# BiSeNetV2 + +[Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation](https://arxiv.org/abs/2004.02147) ## Introduction @@ -19,10 +21,9 @@ The low-level details and high-level semantics are both essential to the semanti -
-BiSeNetV2 (IJCV'2021) +## Citation -```latex +```bibtex @article{yu2021bisenet, title={Bisenet v2: Bilateral network with guided aggregation for real-time semantic segmentation}, author={Yu, Changqian and Gao, Changxin and Wang, Jingbo and Yu, Gang and Shen, Chunhua and Sang, Nong}, @@ -33,7 +34,6 @@ The low-level details and high-level semantics are both essential to the semanti } ``` -
## Results and models diff --git a/configs/ccnet/README.md b/configs/ccnet/README.md index bf318f6ce..9cefcf023 100644 --- a/configs/ccnet/README.md +++ b/configs/ccnet/README.md @@ -1,4 +1,6 @@ -# CCNet: Criss-Cross Attention for Semantic Segmentation +# CCNet + +[CCNet: Criss-Cross Attention for Semantic Segmentation](https://arxiv.org/abs/1811.11721) ## Introduction @@ -19,10 +21,9 @@ Contextual information is vital in visual understanding problems, such as semant -
-CCNet (ICCV'2019) +## Citation -```latex +```bibtex @article{huang2018ccnet, title={CCNet: Criss-Cross Attention for Semantic Segmentation}, author={Huang, Zilong and Wang, Xinggang and Huang, Lichao and Huang, Chang and Wei, Yunchao and Liu, Wenyu}, @@ -31,7 +32,6 @@ Contextual information is vital in visual understanding problems, such as semant } ``` -
## Results and models diff --git a/configs/cgnet/README.md b/configs/cgnet/README.md index 69d46888d..fefb29140 100644 --- a/configs/cgnet/README.md +++ b/configs/cgnet/README.md @@ -1,4 +1,6 @@ -# CGNet: A Light-weight Context Guided Network for Semantic Segmentation +# CGNet + +[CGNet: A Light-weight Context Guided Network for Semantic Segmentation](https://arxiv.org/abs/1811.08201) ## Introduction @@ -19,10 +21,9 @@ The demand of applying semantic segmentation model on mobile devices has been in -
-CGNet (TIP'2020) +## Citation -```latext +```bibtext @article{wu2020cgnet, title={Cgnet: A light-weight context guided network for semantic segmentation}, author={Wu, Tianyi and Tang, Sheng and Zhang, Rui and Cao, Juan and Zhang, Yongdong}, @@ -34,8 +35,6 @@ The demand of applying semantic segmentation model on mobile devices has been in } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/cgnet/cgnet.yml b/configs/cgnet/cgnet.yml index ebb3760dd..b1506e0fe 100644 --- a/configs/cgnet/cgnet.yml +++ b/configs/cgnet/cgnet.yml @@ -4,7 +4,7 @@ Collections: Training Data: - Cityscapes Paper: - URL: https://arxiv.org/pdf/1811.08201.pdf + URL: https://arxiv.org/abs/1811.08201 Title: 'CGNet: A Light-weight Context Guided Network for Semantic Segmentation' README: configs/cgnet/README.md Code: diff --git a/configs/danet/README.md b/configs/danet/README.md index 69bc98a6f..411c59562 100644 --- a/configs/danet/README.md +++ b/configs/danet/README.md @@ -1,4 +1,6 @@ -# Dual Attention Network for Scene Segmentation +# DANet + +[Dual Attention Network for Scene Segmentation](https://arxiv.org/abs/1809.02983) ## Introduction @@ -19,10 +21,9 @@ In this paper, we address the scene segmentation task by capturing rich contextu -
-DANet (CVPR'2019) +## Citation -```latex +```bibtex @article{fu2018dual, title={Dual Attention Network for Scene Segmentation}, author={Jun Fu, Jing Liu, Haijie Tian, Yong Li, Yongjun Bao, Zhiwei Fang,and Hanqing Lu}, @@ -31,8 +32,6 @@ In this paper, we address the scene segmentation task by capturing rich contextu } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/deeplabv3/README.md b/configs/deeplabv3/README.md index 6006839ac..a5d85a5ef 100644 --- a/configs/deeplabv3/README.md +++ b/configs/deeplabv3/README.md @@ -1,4 +1,6 @@ -# Rethinking atrous convolution for semantic image segmentation +# DeepLabV3 + +[Rethinking atrous convolution for semantic image segmentation](https://arxiv.org/abs/1706.05587) ## Introduction @@ -19,10 +21,9 @@ In this work, we revisit atrous convolution, a powerful tool to explicitly adjus -
-DeepLabV3 (ArXiv'2017) +## Citation -```latext +```bibtext @article{chen2017rethinking, title={Rethinking atrous convolution for semantic image segmentation}, author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig}, @@ -31,14 +32,8 @@ In this work, we revisit atrous convolution, a powerful tool to explicitly adjus } ``` -
- ## Results and models -:::{note} -`D-8` here corresponding to the output stride 8 setting for DeepLab series. -::: - ### Cityscapes | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | @@ -117,4 +112,5 @@ In this work, we revisit atrous convolution, a powerful tool to explicitly adjus Note: +- `D-8` here corresponding to the output stride 8 setting for DeepLab series. - `FP16` means Mixed Precision (FP16) is adopted in training. diff --git a/configs/deeplabv3plus/README.md b/configs/deeplabv3plus/README.md index a36ec8968..91b66dd50 100644 --- a/configs/deeplabv3plus/README.md +++ b/configs/deeplabv3plus/README.md @@ -1,4 +1,6 @@ -# Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation +# DeepLabV3+ + +[Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) ## Introduction @@ -19,10 +21,9 @@ Spatial pyramid pooling module or encode-decoder structure are used in deep neur -
-DeepLabV3+ (CVPR'2018) +## Citation -```latex +```bibtex @inproceedings{deeplabv3plus2018, title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, @@ -31,15 +32,8 @@ Spatial pyramid pooling module or encode-decoder structure are used in deep neur } ``` -
- ## Results and models -:::{note} -`D-8`/`D-16` here corresponding to the output stride 8/16 setting for DeepLab series. -`MG-124` stands for multi-grid dilation in the last stage of ResNet. -::: - ### Cityscapes | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | @@ -122,4 +116,6 @@ Spatial pyramid pooling module or encode-decoder structure are used in deep neur Note: +- `D-8`/`D-16` here corresponding to the output stride 8/16 setting for DeepLab series. +- `MG-124` stands for multi-grid dilation in the last stage of ResNet. - `FP16` means Mixed Precision (FP16) is adopted in training. diff --git a/configs/dmnet/README.md b/configs/dmnet/README.md index 1bb497a13..0729268ca 100644 --- a/configs/dmnet/README.md +++ b/configs/dmnet/README.md @@ -1,4 +1,6 @@ -# Dynamic Multi-scale Filters for Semantic Segmentation +# DMNet + +[Dynamic Multi-scale Filters for Semantic Segmentation](https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf) ## Introduction @@ -19,10 +21,9 @@ Multi-scale representation provides an effective way toaddress scale variation o -
-DMNet (ICCV'2019) +## Citation -```latex +```bibtex @InProceedings{He_2019_ICCV, author = {He, Junjun and Deng, Zhongying and Qiao, Yu}, title = {Dynamic Multi-Scale Filters for Semantic Segmentation}, @@ -32,8 +33,6 @@ year = {2019} } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/dnlnet/README.md b/configs/dnlnet/README.md index f8c6e6c75..ff335113d 100644 --- a/configs/dnlnet/README.md +++ b/configs/dnlnet/README.md @@ -1,4 +1,6 @@ -# Disentangled Non-Local Neural Networks +# DNLNet + +[Disentangled Non-Local Neural Networks](https://arxiv.org/abs/2006.06668) ## Introduction @@ -19,14 +21,13 @@ The non-local block is a popular module for strengthening the context modeling a -
-DNLNet (ECCV'2020) +## Citation This example is to reproduce ["Disentangled Non-Local Neural Networks"](https://arxiv.org/abs/2006.06668) for semantic segmentation. It is still in progress. ## Citation -```latex +```bibtex @misc{yin2020disentangled, title={Disentangled Non-Local Neural Networks}, author={Minghao Yin and Zhuliang Yao and Yue Cao and Xiu Li and Zheng Zhang and Stephen Lin and Han Hu}, @@ -35,8 +36,6 @@ This example is to reproduce ["Disentangled Non-Local Neural Networks"](https:// } ``` -
- ## Results and models (in progress) ### Cityscapes diff --git a/configs/dpt/README.md b/configs/dpt/README.md index 63d59bcc7..2fd8d32a4 100644 --- a/configs/dpt/README.md +++ b/configs/dpt/README.md @@ -1,4 +1,6 @@ -# Vision Transformer for Dense Prediction +# DPT + +[Vision Transformer for Dense Prediction](https://arxiv.org/abs/2103.13413) ## Introduction @@ -19,10 +21,9 @@ We introduce dense vision transformers, an architecture that leverages vision tr -
-DPT (ArXiv'2021) +## Citation -```latex +```bibtex @article{dosoViTskiy2020, title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, @@ -38,8 +39,6 @@ We introduce dense vision transformers, an architecture that leverages vision tr } ``` -
- ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. diff --git a/configs/emanet/README.md b/configs/emanet/README.md index 73dea783d..34dba42ec 100644 --- a/configs/emanet/README.md +++ b/configs/emanet/README.md @@ -1,4 +1,6 @@ -# Expectation-Maximization Attention Networks for Semantic Segmentation +# EMANet + +[Expectation-Maximization Attention Networks for Semantic Segmentation](https://arxiv.org/abs/1907.13426) ## Introduction @@ -19,10 +21,9 @@ Self-attention mechanism has been widely used for various tasks. It is designed -
-EMANet (ICCV'2019) +## Citation -```latex +```bibtex @inproceedings{li2019expectation, title={Expectation-maximization attention networks for semantic segmentation}, author={Li, Xia and Zhong, Zhisheng and Wu, Jianlong and Yang, Yibo and Lin, Zhouchen and Liu, Hong}, @@ -32,8 +33,6 @@ Self-attention mechanism has been widely used for various tasks. It is designed } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/encnet/README.md b/configs/encnet/README.md index f5925879e..64cfe1ab8 100644 --- a/configs/encnet/README.md +++ b/configs/encnet/README.md @@ -1,4 +1,6 @@ -# Context Encoding for Semantic Segmentation +# EncNet + +[Context Encoding for Semantic Segmentation](https://arxiv.org/abs/1803.08904) ## Introduction @@ -19,10 +21,9 @@ Recent work has made significant progress in improving spatial resolution for pi -
-EncNet (CVPR'2018) +## Citation -```latex +```bibtex @InProceedings{Zhang_2018_CVPR, author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, title = {Context Encoding for Semantic Segmentation}, @@ -32,8 +33,6 @@ year = {2018} } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/erfnet/README.md b/configs/erfnet/README.md index 6d0811709..a0ccb6114 100644 --- a/configs/erfnet/README.md +++ b/configs/erfnet/README.md @@ -1,4 +1,6 @@ -# ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation +# ERFNet + +[ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation](http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf) ## Introduction @@ -19,10 +21,9 @@ Semantic segmentation is a challenging task that addresses most of the perceptio -
-ERFNet (T-ITS'2017) +## Citation -```latex +```bibtex @article{romera2017erfnet, title={Erfnet: Efficient residual factorized convnet for real-time semantic segmentation}, author={Romera, Eduardo and Alvarez, Jos{\'e} M and Bergasa, Luis M and Arroyo, Roberto}, @@ -35,8 +36,6 @@ Semantic segmentation is a challenging task that addresses most of the perceptio } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/fastfcn/README.md b/configs/fastfcn/README.md index f247fcf62..eea215847 100644 --- a/configs/fastfcn/README.md +++ b/configs/fastfcn/README.md @@ -1,4 +1,6 @@ -# FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation +# FastFCN + +[FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation](https://arxiv.org/abs/1903.11816) ## Introduction @@ -19,10 +21,9 @@ Modern approaches for semantic segmentation usually employ dilated convolutions -
-FastFCN (ArXiv'2019) +## Citation -```latex +```bibtex @article{wu2019fastfcn, title={Fastfcn: Rethinking dilated convolution in the backbone for semantic segmentation}, author={Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu, Yizhou}, @@ -31,8 +32,6 @@ year={2019} } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/fastscnn/README.md b/configs/fastscnn/README.md index 3556546fe..570faff0e 100644 --- a/configs/fastscnn/README.md +++ b/configs/fastscnn/README.md @@ -1,4 +1,6 @@ -# Fast-SCNN for Semantic Segmentation +# Fast-SCNN + +[Fast-SCNN for Semantic Segmentation](https://arxiv.org/abs/1902.04502) ## Introduction @@ -19,10 +21,9 @@ The encoder-decoder framework is state-of-the-art for offline semantic image seg -
-Fast-SCNN (ArXiv'2019) +## Citation -```latex +```bibtex @article{poudel2019fast, title={Fast-scnn: Fast semantic segmentation network}, author={Poudel, Rudra PK and Liwicki, Stephan and Cipolla, Roberto}, @@ -31,8 +32,6 @@ The encoder-decoder framework is state-of-the-art for offline semantic image seg } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/fcn/README.md b/configs/fcn/README.md index ff52cc41c..8863e7a3a 100644 --- a/configs/fcn/README.md +++ b/configs/fcn/README.md @@ -1,4 +1,6 @@ -# Fully Convolutional Networks for Semantic Segmentation +# FCN + +[Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) ## Introduction @@ -19,10 +21,9 @@ Convolutional networks are powerful visual models that yield hierarchies of feat -
-FCN (CVPR'2015/TPAMI'2017) +## Citation -```latex +```bibtex @article{shelhamer2017fully, title={Fully convolutional networks for semantic segmentation}, author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor}, @@ -35,8 +36,6 @@ Convolutional networks are powerful visual models that yield hierarchies of feat } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md index b6a44b2bc..47f2f434e 100644 --- a/configs/gcnet/README.md +++ b/configs/gcnet/README.md @@ -1,4 +1,6 @@ -# GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond +# GCNet + +[GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492) ## Introduction @@ -19,10 +21,9 @@ The Non-Local Network (NLNet) presents a pioneering approach for capturing long- -
-GCNet (ICCVW'2019/TPAMI'2020) +## Citation -```latex +```bibtex @inproceedings{cao2019gcnet, title={Gcnet: Non-local networks meet squeeze-excitation networks and beyond}, author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, @@ -32,8 +33,6 @@ The Non-Local Network (NLNet) presents a pioneering approach for capturing long- } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md index a6de1db2d..885ec19b1 100644 --- a/configs/hrnet/README.md +++ b/configs/hrnet/README.md @@ -1,8 +1,10 @@ -# Deep High-Resolution Representation Learning for Human Pose Estimation +# HRNet + +[Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1908.07919) ## Introduction - + Official Repo @@ -19,10 +21,9 @@ High-resolution representations are essential for position-sensitive vision prob -
-HRNet (CVPR'2019) +## Citation -```latext +```bibtext @inproceedings{SunXLW19, title={Deep High-Resolution Representation Learning for Human Pose Estimation}, author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, @@ -31,8 +32,6 @@ High-resolution representations are essential for position-sensitive vision prob } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/icnet/README.md b/configs/icnet/README.md index a1625008f..48e8b46aa 100644 --- a/configs/icnet/README.md +++ b/configs/icnet/README.md @@ -1,4 +1,6 @@ -# ICNet for Real-time Semantic Segmentation on High-resolution Images +# ICNet + +[ICNet for Real-time Semantic Segmentation on High-resolution Images](https://arxiv.org/abs/1704.08545) ## Introduction @@ -19,10 +21,9 @@ We focus on the challenging task of real-time semantic segmentation in this pape -
-ICNet (ECCV'2018) +## Citation -```latext +```bibtext @inproceedings{zhao2018icnet, title={Icnet for real-time semantic segmentation on high-resolution images}, author={Zhao, Hengshuang and Qi, Xiaojuan and Shen, Xiaoyong and Shi, Jianping and Jia, Jiaya}, @@ -32,8 +33,6 @@ We focus on the challenging task of real-time semantic segmentation in this pape } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/isanet/README.md b/configs/isanet/README.md index 7ffed5314..a120e2e3f 100644 --- a/configs/isanet/README.md +++ b/configs/isanet/README.md @@ -1,4 +1,6 @@ -# Interlaced Sparse Self-Attention for Semantic Segmentation +# ISANet + +[Interlaced Sparse Self-Attention for Semantic Segmentation](https://arxiv.org/abs/1907.12273) ## Introduction @@ -19,18 +21,20 @@ In this paper, we present a so-called interlaced sparse self-attention approach -
-ISANet (ArXiv'2019/IJCV'2021) +## Citation -``` +```bibetex @article{huang2019isa, title={Interlaced Sparse Self-Attention for Semantic Segmentation}, author={Huang, Lang and Yuan, Yuhui and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong}, journal={arXiv preprint arXiv:1907.12273}, year={2019} } +``` The technical report above is also presented at: + +```bibetex @article{yuan2021ocnet, title={OCNet: Object Context for Semantic Segmentation}, author={Yuan, Yuhui and Huang, Lang and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong}, @@ -41,8 +45,6 @@ The technical report above is also presented at: } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/mobilenet_v2/README.md b/configs/mobilenet_v2/README.md index 697a00611..bef889870 100644 --- a/configs/mobilenet_v2/README.md +++ b/configs/mobilenet_v2/README.md @@ -1,8 +1,10 @@ -# MobileNetV2: Inverted Residuals and Linear Bottlenecks +# MobileNetV2 + +[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) ## Introduction - + Official Repo @@ -20,10 +22,9 @@ The MobileNetV2 architecture is based on an inverted residual structure where th -
-MobileNetV2 (CVPR'2018) +## Citation -```latex +```bibtex @inproceedings{sandler2018mobilenetv2, title={Mobilenetv2: Inverted residuals and linear bottlenecks}, author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, @@ -33,8 +34,6 @@ The MobileNetV2 architecture is based on an inverted residual structure where th } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/mobilenet_v3/README.md b/configs/mobilenet_v3/README.md index cb1940a83..b08ac2728 100644 --- a/configs/mobilenet_v3/README.md +++ b/configs/mobilenet_v3/README.md @@ -1,8 +1,10 @@ -# Searching for MobileNetV3 +# MobileNetV3 + +[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) ## Introduction - + Official Repo @@ -19,10 +21,9 @@ We present the next generation of MobileNets based on a combination of complemen -
-MobileNetV3 (ICCV'2019) +## Citation -```latex +```bibtex @inproceedings{Howard_2019_ICCV, title={Searching for MobileNetV3}, author={Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and Le, Quoc V. and Adam, Hartwig}, @@ -34,8 +35,6 @@ We present the next generation of MobileNets based on a combination of complemen } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/nonlocal_net/README.md b/configs/nonlocal_net/README.md index 0a67f6a97..787d87b84 100644 --- a/configs/nonlocal_net/README.md +++ b/configs/nonlocal_net/README.md @@ -1,4 +1,6 @@ -# Non-local Neural Networks +# NonLocal Net + +[Non-local Neural Networks](https://arxiv.org/abs/1711.07971) ## Introduction @@ -19,10 +21,9 @@ Both convolutional and recurrent operations are building blocks that process one -
-NonLocal Net (CVPR'2018) +## Citation -```latex +```bibtex @inproceedings{wang2018non, title={Non-local neural networks}, author={Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming}, @@ -32,8 +33,6 @@ Both convolutional and recurrent operations are building blocks that process one } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/ocrnet/README.md b/configs/ocrnet/README.md index ea2eb4275..ef7312af0 100644 --- a/configs/ocrnet/README.md +++ b/configs/ocrnet/README.md @@ -1,4 +1,6 @@ -# Object-Contextual Representations for Semantic Segmentation +# OCRNet + +[Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/abs/1909.11065) ## Introduction @@ -19,10 +21,9 @@ In this paper, we address the problem of semantic segmentation and focus on the -
-OCRNet (ECCV'2020) +## Citation -```latex +```bibtex @article{YuanW18, title={Ocnet: Object context network for scene parsing}, author={Yuhui Yuan and Jingdong Wang}, @@ -38,8 +39,6 @@ In this paper, we address the problem of semantic segmentation and focus on the } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md index 093047741..34448e36b 100644 --- a/configs/point_rend/README.md +++ b/configs/point_rend/README.md @@ -1,4 +1,6 @@ -# PointRend: Image Segmentation as Rendering +# PointRend + +[PointRend: Image Segmentation as Rendering](https://arxiv.org/abs/1912.08193) ## Introduction @@ -19,10 +21,9 @@ We present a new method for efficient high-quality image segmentation of objects -
-PointRend (CVPR'2020) +## Citation -``` +```bibtex @inproceedings{kirillov2020pointrend, title={Pointrend: Image segmentation as rendering}, author={Kirillov, Alexander and Wu, Yuxin and He, Kaiming and Girshick, Ross}, @@ -32,8 +33,6 @@ We present a new method for efficient high-quality image segmentation of objects } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/psanet/README.md b/configs/psanet/README.md index 06126166e..fede7d439 100644 --- a/configs/psanet/README.md +++ b/configs/psanet/README.md @@ -1,4 +1,6 @@ -# PSANet: Point-wise Spatial Attention Network for Scene Parsing +# PSANet + +[PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf) ## Introduction @@ -19,10 +21,9 @@ We notice information flow in convolutional neural networksis restricted insid -
-PSANet (ECCV'2018) +## Citation -```latex +```bibtex @inproceedings{zhao2018psanet, title={Psanet: Point-wise spatial attention network for scene parsing}, author={Zhao, Hengshuang and Zhang, Yi and Liu, Shu and Shi, Jianping and Change Loy, Chen and Lin, Dahua and Jia, Jiaya}, @@ -32,8 +33,6 @@ We notice information flow in convolutional neural networksis restricted insid } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/pspnet/README.md b/configs/pspnet/README.md index 25f021bbb..ca8bddabb 100644 --- a/configs/pspnet/README.md +++ b/configs/pspnet/README.md @@ -1,4 +1,6 @@ -# Pyramid Scene Parsing Network +# PSPNet + +[Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105) ## Introduction @@ -19,10 +21,9 @@ Scene parsing is challenging for unrestricted open vocabulary and diverse scenes -
-PSPNet (CVPR'2017) +## Citation -```latex +```bibtex @inproceedings{zhao2017pspnet, title={Pyramid Scene Parsing Network}, author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya}, @@ -31,8 +32,6 @@ Scene parsing is challenging for unrestricted open vocabulary and diverse scenes } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/resnest/README.md b/configs/resnest/README.md index 7b29a9b4f..fbabf98e3 100644 --- a/configs/resnest/README.md +++ b/configs/resnest/README.md @@ -1,8 +1,10 @@ -# ResNeSt: Split-Attention Networks +# ResNeSt + +[ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955) ## Introduction - + Official Repo @@ -19,10 +21,9 @@ It is well known that featuremap attention and multi-path representation are imp -
-ResNeSt (ArXiv'2020) +## Citation -```latex +```bibtex @article{zhang2020resnest, title={ResNeSt: Split-Attention Networks}, author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, @@ -31,8 +32,6 @@ year={2020} } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/segformer/README.md b/configs/segformer/README.md index 560696072..790c0f519 100644 --- a/configs/segformer/README.md +++ b/configs/segformer/README.md @@ -1,4 +1,6 @@ -# SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers +# SegFormer + +[SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) ## Introduction @@ -19,10 +21,9 @@ We present SegFormer, a simple, efficient yet powerful semantic segmentation fra -
-SegFormer (ArXiv'2021) +## Citation -```latex +```bibtex @article{xie2021segformer, title={SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers}, author={Xie, Enze and Wang, Wenhai and Yu, Zhiding and Anandkumar, Anima and Alvarez, Jose M and Luo, Ping}, @@ -31,8 +32,6 @@ We present SegFormer, a simple, efficient yet powerful semantic segmentation fra } ``` -
- ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. diff --git a/configs/segformer/segformer.yml b/configs/segformer/segformer.yml index 4d9453257..c4efc582a 100644 --- a/configs/segformer/segformer.yml +++ b/configs/segformer/segformer.yml @@ -6,7 +6,8 @@ Collections: - Cityscapes Paper: URL: https://arxiv.org/abs/2105.15203 - Title: resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU. + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' README: configs/segformer/README.md Code: URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 diff --git a/configs/sem_fpn/README.md b/configs/sem_fpn/README.md index e7e87132a..a3732fdfd 100644 --- a/configs/sem_fpn/README.md +++ b/configs/sem_fpn/README.md @@ -1,4 +1,6 @@ -# Panoptic Feature Pyramid Networks +# Semantic FPN + +[Panoptic Feature Pyramid Networks](https://arxiv.org/abs/1901.02446) ## Introduction @@ -19,25 +21,18 @@ The recently introduced panoptic segmentation task has renewed our community's i -
-Semantic FPN (CVPR'2019) +## Citation -```latex -@article{Kirillov_2019, - title={Panoptic Feature Pyramid Networks}, - ISBN={9781728132938}, - url={http://dx.doi.org/10.1109/CVPR.2019.00656}, - DOI={10.1109/cvpr.2019.00656}, - journal={2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, - publisher={IEEE}, - author={Kirillov, Alexander and Girshick, Ross and He, Kaiming and Dollar, Piotr}, - year={2019}, - month={Jun} +```bibtex +@inproceedings{kirillov2019panoptic, + title={Panoptic feature pyramid networks}, + author={Kirillov, Alexander and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={6399--6408}, + year={2019} } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/setr/README.md b/configs/setr/README.md index d1eb0260f..5673d9b63 100644 --- a/configs/setr/README.md +++ b/configs/setr/README.md @@ -1,4 +1,6 @@ -# Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers +# SETR + +[Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers](https://arxiv.org/abs/2012.15840) ## Introduction @@ -23,10 +25,9 @@ Most recent semantic segmentation methods adopt a fully-convolutional network (F This head has two version head. ``` -
-SETR (CVPR'2021) +## Citation -```latex +```bibtex @article{zheng2020rethinking, title={Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers}, author={Zheng, Sixiao and Lu, Jiachen and Zhao, Hengshuang and Zhu, Xiatian and Luo, Zekun and Wang, Yabiao and Fu, Yanwei and Feng, Jianfeng and Xiang, Tao and Torr, Philip HS and others}, @@ -35,8 +36,6 @@ This head has two version head. } ``` -
- ## Results and models ### ADE20K diff --git a/configs/stdc/README.md b/configs/stdc/README.md index 11fe3d677..4fece6fe3 100644 --- a/configs/stdc/README.md +++ b/configs/stdc/README.md @@ -1,4 +1,6 @@ -# Rethinking BiSeNet For Real-time Semantic Segmentation +# STDC + +[Rethinking BiSeNet For Real-time Semantic Segmentation](https://arxiv.org/abs/2104.13188) ## Introduction @@ -19,10 +21,9 @@ BiSeNet has been proved to be a popular two-stream network for real-time segment -
-STDC (CVPR'2021) +## Citation -```latex +```bibtex @inproceedings{fan2021rethinking, title={Rethinking BiSeNet For Real-time Semantic Segmentation}, author={Fan, Mingyuan and Lai, Shenqi and Huang, Junshi and Wei, Xiaoming and Chai, Zhenhua and Luo, Junfeng and Wei, Xiaolin}, @@ -32,8 +33,6 @@ BiSeNet has been proved to be a popular two-stream network for real-time segment } ``` -
- ## Usage To use original repositories' [ImageNet Pretrained STDCNet Weights](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1) , it is necessary to convert keys. diff --git a/configs/swin/README.md b/configs/swin/README.md index 422133153..ac4bd56e7 100644 --- a/configs/swin/README.md +++ b/configs/swin/README.md @@ -1,8 +1,10 @@ -# Swin Transformer: Hierarchical Vision Transformer using Shifted Windows +# Swin Transformer + +[Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) ## Introduction - + Official Repo @@ -19,10 +21,9 @@ This paper presents a new vision Transformer, called Swin Transformer, that capa -
-Swin Transformer (arXiv'2021) +## Citation -```latex +```bibtex @article{liu2021Swin, title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows}, author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining}, @@ -31,8 +32,6 @@ This paper presents a new vision Transformer, called Swin Transformer, that capa } ``` -
- ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. diff --git a/configs/twins/README.md b/configs/twins/README.md index 0dbb41e6f..0ecb79d01 100644 --- a/configs/twins/README.md +++ b/configs/twins/README.md @@ -1,8 +1,10 @@ -# Twins: Revisiting the Design of Spatial Attention in Vision Transformers +# Twins + +[Twins: Revisiting the Design of Spatial Attention in Vision Transformers](https://arxiv.org/pdf/2104.13840.pdf) ## Introduction - + Official Repo @@ -19,10 +21,9 @@ Very recently, a variety of vision transformer architectures for dense predictio -
- Twins (NeurIPS'2021) +## Citation -```latex +```bibtex @article{chu2021twins, title={Twins: Revisiting spatial attention design in vision transformers}, author={Chu, Xiangxiang and Tian, Zhi and Wang, Yuqing and Zhang, Bo and Ren, Haibing and Wei, Xiaolin and Xia, Huaxia and Shen, Chunhua}, @@ -31,8 +32,6 @@ Very recently, a variety of vision transformer architectures for dense predictio } ``` -
- ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. diff --git a/configs/unet/README.md b/configs/unet/README.md index 727ecf5e3..96bc98d7a 100644 --- a/configs/unet/README.md +++ b/configs/unet/README.md @@ -1,4 +1,6 @@ -# U-Net: Convolutional Networks for Biomedical Image Segmentation +# UNet + +[U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) ## Introduction @@ -19,10 +21,9 @@ There is large consent that successful training of deep networks requires many t -
-UNet (MICCAI'2016/Nat. Methods'2019) +## Citation -```latex +```bibtex @inproceedings{ronneberger2015u, title={U-net: Convolutional networks for biomedical image segmentation}, author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas}, @@ -33,8 +34,6 @@ There is large consent that successful training of deep networks requires many t } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/upernet/README.md b/configs/upernet/README.md index 50e788e12..0ab3cb3d3 100644 --- a/configs/upernet/README.md +++ b/configs/upernet/README.md @@ -1,4 +1,6 @@ -# Unified Perceptual Parsing for Scene Understanding +# UPerNet + +[Unified Perceptual Parsing for Scene Understanding](https://arxiv.org/pdf/1807.10221.pdf) ## Introduction @@ -19,10 +21,9 @@ Humans recognize the visual world at multiple levels: we effortlessly categorize -
-UPerNet (ECCV'2018) +## Citation -```latex +```bibtex @inproceedings{xiao2018unified, title={Unified perceptual parsing for scene understanding}, author={Xiao, Tete and Liu, Yingcheng and Zhou, Bolei and Jiang, Yuning and Sun, Jian}, @@ -32,8 +33,6 @@ Humans recognize the visual world at multiple levels: we effortlessly categorize } ``` -
- ## Results and models ### Cityscapes diff --git a/configs/vit/README.md b/configs/vit/README.md index ac06f5169..eec65b52e 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -1,8 +1,10 @@ # Vision Transformer +[An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/pdf/2010.11929.pdf) + ## Introduction - + Official Repo @@ -19,10 +21,9 @@ While the Transformer architecture has become the de-facto standard for natural -
-Vision Transformer (ICLR'2021) +## Citation -```latex +```bibtex @article{dosoViTskiy2020, title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, @@ -31,8 +32,6 @@ While the Transformer architecture has become the de-facto standard for natural } ``` -
- ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. diff --git a/configs/vit/vit.yml b/configs/vit/vit.yml index 9d6449b0a..9e3b02e5a 100644 --- a/configs/vit/vit.yml +++ b/configs/vit/vit.yml @@ -5,7 +5,7 @@ Collections: - ADE20K Paper: URL: https://arxiv.org/pdf/2010.11929.pdf - Title: Vision Transformer + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' README: configs/vit/README.md Code: URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98