Merge pull request #59 from cuhk-hbsun/develop

v0.1.0
2021-04-06 23:38:35 +08:00 · 2021-04-06 23:38:35 +08:00 · d1cc0dad44
parent f521526ff4 0b2aa3aec6
commit d1cc0dad44
392 changed files with 46272 additions and 12 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -0,0 +1,3 @@
+[run]
+omit =
+    */__init__.py
--- a/.github/CODE_OF_CONDUCT.md
+++ b/.github/CODE_OF_CONDUCT.md
@ -0,0 +1,76 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at chenkaidev@gmail.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@ -0,0 +1 @@
+We appreciate all contributions to improve MMOCR. Please refer to [CONTRIBUTING.md](/docs/contributing.md) in MMCV for more details about the contributing guideline.
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -0,0 +1,68 @@
+name: build
+
+on:
+  push:
+   branches:
+    - master
+
+  pull_request:
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+      - name: Install pre-commit hook
+        run: |
+          pip install pre-commit
+          pre-commit install
+      - name: Linting
+        run: pre-commit run --all-files
+      - name: Check docstring coverage
+        run: |
+          pip install interrogate
+          interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 50 mmocr
+
+  build_cpu:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.7]
+        torch: [1.5.0, 1.6.0, 1.7.0]
+        include:
+          - torch: 1.5.0
+            torchvision: 0.6.0
+          - torch: 1.6.0
+            torchvision: 0.7.0
+          - torch: 1.7.0
+            torchvision: 0.8.1
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Upgrade pip
+        run: pip install pip --upgrade
+      - name: Install Pillow
+        run: pip install Pillow==6.2.2
+        if: ${{matrix.torchvision == '0.4.1'}}
+      - name: Install PyTorch
+        run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
+      - name: Install MMCV
+        run: pip install mmcv-full==1.3.0 -f https://download.openmmlab.com/mmcv/dist/cpu/torch${{matrix.torch}}/index.html
+      - name: Install MMDet
+        run: pip install git+https://github.com/open-mmlab/mmdetection/
+      - name: Install other dependencies
+        run: pip install -r requirements.txt
+      - name: Build and install
+        run: rm -rf .eggs && pip install -e .
+      - name: Run unittests and generate coverage report
+        run: |
+          coverage run --branch --source mmocr -m pytest tests/
+          coverage xml
+          coverage report -m
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@ -0,0 +1,20 @@
+name: deploy
+
+on: push
+
+jobs:
+  build-n-publish:
+    runs-on: ubuntu-latest
+    if: startsWith(github.event.ref, 'refs/tags')
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.7
+      - name: Build MMOCR
+        run: python setup.py sdist
+      - name: Publish distribution to PyPI
+        run: |
+          pip install twine
+          twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.ipynb
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# cython generated cpp
+!data/dict
+data/*
+.vscode
+.idea
+
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+exps/
+*~
+show_dir/
+
+# Pytorch
+*.pth
+
+# demo
+!tests/data
+tests/results
+
+#temp files
+.DS_Store
+
+checkpoints
+
+htmlcov
+*.swp
+log.txt
+workspace.code-workspace
+results
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,36 @@
+exclude: ^tests/data/
+repos:
+  - repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.1
+    hooks:
+      - id: flake8
+  - repo: https://github.com/asottile/seed-isort-config
+    rev: v2.2.0
+    hooks:
+      - id: seed-isort-config
+  - repo: https://github.com/timothycrosley/isort
+    rev: 4.3.21
+    hooks:
+      - id: isort
+  - repo: https://github.com/pre-commit/mirrors-yapf
+    rev: v0.30.0
+    hooks:
+      - id: yapf
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.1.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: ["--remove"]
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+  - repo: https://github.com/myint/docformatter
+    rev: v1.3.1
+    hooks:
+      - id: docformatter
+        args: ["--in-place", "--wrap-descriptions", "79"]
--- a/.pylintrc
+++ b/.pylintrc
@ -0,0 +1,621 @@
+[MASTER]
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-whitelist=
+
+# Specify a score threshold to be exceeded before program exits with error.
+fail-under=10.0
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS,configs
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=print-statement,
+        parameter-unpacking,
+        unpacking-in-except,
+        old-raise-syntax,
+        backtick,
+        long-suffix,
+        old-ne-operator,
+        old-octal-literal,
+        import-star-module-level,
+        non-ascii-bytes-literal,
+        raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        apply-builtin,
+        basestring-builtin,
+        buffer-builtin,
+        cmp-builtin,
+        coerce-builtin,
+        execfile-builtin,
+        file-builtin,
+        long-builtin,
+        raw_input-builtin,
+        reduce-builtin,
+        standarderror-builtin,
+        unicode-builtin,
+        xrange-builtin,
+        coerce-method,
+        delslice-method,
+        getslice-method,
+        setslice-method,
+        no-absolute-import,
+        old-division,
+        dict-iter-method,
+        dict-view-method,
+        next-method-called,
+        metaclass-assignment,
+        indexing-exception,
+        raising-string,
+        reload-builtin,
+        oct-method,
+        hex-method,
+        nonzero-method,
+        cmp-method,
+        input-builtin,
+        round-builtin,
+        intern-builtin,
+        unichr-builtin,
+        map-builtin-not-iterating,
+        zip-builtin-not-iterating,
+        range-builtin-not-iterating,
+        filter-builtin-not-iterating,
+        using-cmp-argument,
+        eq-without-hash,
+        div-method,
+        idiv-method,
+        rdiv-method,
+        exception-message-attribute,
+        invalid-str-codec,
+        sys-max-int,
+        bad-python3-import,
+        deprecated-string-function,
+        deprecated-str-translate-call,
+        deprecated-itertools-function,
+        deprecated-types-field,
+        next-method-defined,
+        dict-items-not-iterating,
+        dict-keys-not-iterating,
+        dict-values-not-iterating,
+        deprecated-operator-function,
+        deprecated-urllib-function,
+        xreadlines-attribute,
+        deprecated-sys-function,
+        exception-escape,
+        comprehension-escape,
+        no-member,
+        invalid-name,
+        too-many-branches,
+        wrong-import-order,
+        too-many-arguments,
+        missing-function-docstring,
+        missing-module-docstring,
+        too-many-locals,
+        too-few-public-methods,
+        abstract-method,
+        broad-except,
+        too-many-nested-blocks,
+        too-many-instance-attributes,
+        missing-class-docstring,
+        duplicate-code,
+        not-callable,
+        protected-access,
+        dangerous-default-value,
+        no-name-in-module,
+        logging-fstring-interpolation,
+        super-init-not-called,
+        redefined-builtin,
+        attribute-defined-outside-init,
+        arguments-differ,
+        cyclic-import,
+        bad-super-call,
+        too-many-statements
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'error', 'warning', 'refactor', and 'convention'
+# which contain the number of messages in each category, as well as 'statement'
+# which is the total number of statements analyzed. This score is used by the
+# global evaluation report (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[SIMILARITIES]
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+# Regular expression of note tags to take in consideration.
+#notes-rgx=
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+#class-attribute-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _,
+           x,
+           y,
+           w,
+           h,
+           a,
+           b
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+#variable-rgx=
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=optparse,tkinter.tix
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled).
+ext-import-graph=
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled).
+import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "BaseException, Exception".
+overgeneral-exceptions=BaseException,
+                       Exception
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -0,0 +1,7 @@
+version: 2
+
+python:
+    version: 3.7
+    install:
+      - requirements: requirements/docs.txt
+      - requirements: requirements/readthedocs.txt
--- a/203
+++ b/203
@ -0,0 +1,203 @@
+Copyright (c) MMOCR Authors. All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 MMOCR Authors. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
@ -1,17 +1,17 @@
 <div align="center">
-  <img src="resources/mmocr-logo.jpg" width="500px"/>
+  <img src="resources/mmocr-logo.png" width="500px"/>
 </div>

 ## Introduction

-[![build](https://github.com/open-mmlab/mmediting/workflows/build/badge.svg)](https://github.com/open-mmlab/mmediting/actions)
-[![docs](https://readthedocs.org/projects/mmediting/badge/?version=latest)](https://mmediting.readthedocs.io/en/latest/?badge=latest)
-[![codecov](https://codecov.io/gh/open-mmlab/mmediting/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmediting)
-[![license](https://img.shields.io/github/license/open-mmlab/mmediting.svg)](https://github.com/open-mmlab/mmediting/blob/master/LICENSE)
+[![build](https://github.com/open-mmlab/mmocr/workflows/build/badge.svg)](https://github.com/open-mmlab/mmocr/actions)
+[![docs](https://readthedocs.org/projects/mmocr/badge/?version=latest)](https://mmocr.readthedocs.io/en/latest/?badge=latest)
+[![codecov](https://codecov.io/gh/open-mmlab/mmocr/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmocr)
+[![license](https://img.shields.io/github/license/open-mmlab/mmocr.svg)](https://github.com/open-mmlab/mmocr/blob/master/LICENSE)

 MMOCR is an open-source toolbox based on PyTorch and mmdetection for text detection, text recognition, and the corresponding downstream tasks including key information extraction. It is part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).

-The master branch works with **PyTorch 1.5**.
+The master branch works with **PyTorch 1.5+**.

 Documentation: https://mmocr.readthedocs.io/en/latest/.

@ -31,7 +31,7 @@ Documentation: https://mmocr.readthedocs.io/en/latest/.

 - **Modular Design**

-  The modular design of MMOCR enables users to define their own optimizers, data preprocessors, and model components such as backbones, necks and heads as well as losses. Please refer to [GETTING_STARTED.md](docs/GETTING_STARTED.md) for how to construct a customized model.
+  The modular design of MMOCR enables users to define their own optimizers, data preprocessors, and model components such as backbones, necks and heads as well as losses. Please refer to [getting_started.md](docs/getting_started.md) for how to construct a customized model.

 - **Numerous Utilities**

@ -43,24 +43,24 @@ This project is released under the [Apache 2.0 license](LICENSE).

 ## Changelog

-v1.0 was released on 31/03/2021.
+v1.0 was released on 07/04/2021.


 ## Benchmark and Model Zoo

-Please refer to [MODEL_ZOO.md](MODEL_ZOO.md) for more details.
+Please refer to [modelzoo.md](modelzoo.md) for more details.

 ## Installation

-Please refer to [INSTALL.md](docs/INSTALL.md) for installation.
+Please refer to [install.md](docs/install.md) for installation.

 ## Get Started

-Please see [GETTING_STARTED.md](docs/GETTING_STARTED.md) for the basic usage of MMOCR.
+Please see [getting_started.md](docs/getting_started.md) for the basic usage of MMOCR.

 ## Contributing

-We appreciate all contributions to improve MMOCR. Please refer to [CONTRIBUTING.md](docs/CONTRIBUTING.md) for the contributing guidelines.
+We appreciate all contributions to improve MMOCR. Please refer to [contributing.md](docs/contributing.md) for the contributing guidelines.

 ## Acknowledgement

--- a/configs/_base_/default_runtime.py
+++ b/configs/_base_/default_runtime.py
@ -0,0 +1,14 @@
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=5,
+    hooks=[
+        dict(type='TextLoggerHook')
+
+    ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/_base_/det_dataset/toy_dataset.py
+++ b/configs/_base_/det_dataset/toy_dataset.py
@ -0,0 +1,97 @@
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+train_cfg = None
+test_cfg = None
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 640)],
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        keep_ratio=False),
+    # shrink_ratio is from big to small. The 1st must be 1.0
+    dict(type='PANetTargets', shrink_ratio=(1.0, 0.7)),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(3000, 640),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+
+dataset_type = 'TextDetDataset'
+img_prefix = 'tests/data/toy_dataset/imgs'
+train_anno_file = 'tests/data/toy_dataset/instances_test.txt'
+train1 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=4,
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'height', 'width', 'annotations'])),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+data_root = 'tests/data/toy_dataset'
+train2 = dict(
+    type='IcdarDataset',
+    ann_file=data_root + '/instances_test.json',
+    img_prefix=data_root + '/imgs',
+    pipeline=train_pipeline)
+
+test_anno_file = 'tests/data/toy_dataset/instances_test.txt'
+test = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=test_anno_file,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'height', 'width', 'annotations'])),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(type='ConcatDataset', datasets=[train1, train2]),
+    val=dict(type='ConcatDataset', datasets=[test]),
+    test=dict(type='ConcatDataset', datasets=[test]))
+
+evaluation = dict(interval=1, metric='hmean-iou')
--- a/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py
+++ b/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py
@ -0,0 +1,126 @@
+# model settings
+model = dict(
+    type='OCRMaskRCNN',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[4],
+            ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=1,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=1,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1,
+                gpu_assign_thr=50),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_across_levels=False,
+            nms_pre=2000,
+            nms_post=1000,
+            max_num=1000,
+            nms_thr=0.7,
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='OHEMSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_across_levels=False,
+            nms_pre=1000,
+            nms_post=1000,
+            max_num=1000,
+            nms_thr=0.7,
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))
--- a/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem_poly.py
+++ b/configs/_base_/models/ocr_mask_rcnn_r50_fpn_ohem_poly.py
@ -0,0 +1,126 @@
+# model settings
+model = dict(
+    type='OCRMaskRCNN',
+    text_repr_type='poly',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[4],
+            ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sample_num=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sample_num=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_across_levels=False,
+            nms_pre=2000,
+            nms_post=1000,
+            max_num=1000,
+            nms_thr=0.7,
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=True,
+                ignore_iof_thr=-1,
+                gpu_assign_thr=50),
+            sampler=dict(
+                type='OHEMSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            mask_size=28,
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_across_levels=False,
+            nms_pre=1000,
+            nms_post=1000,
+            max_num=1000,
+            nms_thr=0.7,
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))
--- a/configs/_base_/recog_datasets/seg_toy_dataset.py
+++ b/configs/_base_/recog_datasets/seg_toy_dataset.py
@ -0,0 +1,96 @@
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+
+gt_label_convertor = dict(
+    type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomPaddingOCR',
+        max_ratio=[0.15, 0.2, 0.15, 0.2],
+        box_type='char_quads'),
+    dict(type='OpencvToPil'),
+    dict(
+        type='RandomRotateImageBox',
+        min_angle=-17,
+        max_angle=17,
+        box_type='char_quads'),
+    dict(type='PilToOpencv'),
+    dict(
+        type='ResizeOCR',
+        height=64,
+        min_width=64,
+        max_width=512,
+        keep_aspect_ratio=True),
+    dict(
+        type='OCRSegTargets',
+        label_convertor=gt_label_convertor,
+        box_type='char_quads'),
+    dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
+    dict(type='ToTensorOCR'),
+    dict(type='FancyPCA'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels'],
+        visualize=dict(flag=False, boundary_key=None),
+        call_super=False),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_kernels'],
+        meta_keys=['filename', 'ori_shape', 'img_shape'])
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=64,
+        min_width=64,
+        max_width=None,
+        keep_aspect_ratio=True),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(type='CustomFormatBundle', call_super=False),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=['filename', 'ori_shape', 'img_shape'])
+]
+
+prefix = 'tests/data/ocr_char_ann_toy_dataset/'
+train = dict(
+    type='OCRSegDataset',
+    img_prefix=prefix + 'imgs',
+    ann_file=prefix + 'instances_train.txt',
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=100,
+        parser=dict(
+            type='LineJsonParser', keys=['file_name', 'annotations', 'text'])),
+    pipeline=train_pipeline,
+    test_mode=True)
+
+test = dict(
+    type='OCRDataset',
+    img_prefix=prefix + 'imgs',
+    ann_file=prefix + 'instances_test.txt',
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=1,
+    train=dict(type='ConcatDataset', datasets=[train]),
+    val=dict(type='ConcatDataset', datasets=[test]),
+    test=dict(type='ConcatDataset', datasets=[test]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/_base_/recog_datasets/toy_dataset.py
+++ b/configs/_base_/recog_datasets/toy_dataset.py
@ -0,0 +1,99 @@
+img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=32,
+        max_width=160,
+        keep_aspect_ratio=True),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=32,
+                min_width=32,
+                max_width=160,
+                keep_aspect_ratio=True),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
+                ]),
+        ])
+]
+
+dataset_type = 'OCRDataset'
+img_prefix = 'tests/data/ocr_toy_dataset/imgs'
+train_anno_file1 = 'tests/data/ocr_toy_dataset/label.txt'
+train1 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=100,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train_anno_file2 = 'tests/data/ocr_toy_dataset/label.lmdb'
+train2 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file2,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=100,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+test_anno_file1 = 'tests/data/ocr_toy_dataset/label.lmdb'
+test = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=test_anno_file1,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=2,
+    train=dict(type='ConcatDataset', datasets=[train1, train2]),
+    val=dict(type='ConcatDataset', datasets=[test]),
+    test=dict(type='ConcatDataset', datasets=[test]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/_base_/recog_models/crnn.py
+++ b/configs/_base_/recog_models/crnn.py
@ -0,0 +1,11 @@
+label_convertor = dict(
+    type='CTCConvertor', dict_type='DICT90', with_unknown=False)
+
+model = dict(
+    type='CRNNNet',
+    preprocessor=None,
+    backbone=dict(type='VeryDeepVgg', leakyRelu=False),
+    encoder=None,
+    decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True),
+    loss=dict(type='CTCLoss', flatten=False),
+    label_convertor=label_convertor)
--- a/configs/_base_/recog_models/nrtr.py
+++ b/configs/_base_/recog_models/nrtr.py
@ -0,0 +1,11 @@
+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+
+model = dict(
+    type='NRTR',
+    backbone=dict(type='NRTRModalityTransform'),
+    encoder=dict(type='TFEncoder'),
+    decoder=dict(type='TFDecoder'),
+    loss=dict(type='TFLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=40)
--- a/configs/_base_/recog_models/robust_scanner.py
+++ b/configs/_base_/recog_models/robust_scanner.py
@ -0,0 +1,24 @@
+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+
+hybrid_decoder = dict(type='SequenceAttentionDecoder')
+
+position_decoder = dict(type='PositionAttentionDecoder')
+
+model = dict(
+    type='RobustScanner',
+    backbone=dict(type='ResNet31OCR'),
+    encoder=dict(
+        type='ChannelReductionEncoder',
+        in_channels=512,
+        out_channels=128,
+    ),
+    decoder=dict(
+        type='RobustScannerDecoder',
+        dim_input=512,
+        dim_model=128,
+        hybrid_decoder=hybrid_decoder,
+        position_decoder=position_decoder),
+    loss=dict(type='SARLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=30)
--- a/configs/_base_/recog_models/sar.py
+++ b/configs/_base_/recog_models/sar.py
@ -0,0 +1,24 @@
+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+
+model = dict(
+    type='SARNet',
+    backbone=dict(type='ResNet31OCR'),
+    encoder=dict(
+        type='SAREncoder',
+        enc_bi_rnn=False,
+        enc_do_rnn=0.1,
+        enc_gru=False,
+    ),
+    decoder=dict(
+        type='ParallelSARDecoder',
+        enc_bi_rnn=False,
+        dec_bi_rnn=False,
+        dec_do_rnn=0,
+        dec_gru=False,
+        pred_dropout=0.1,
+        d_k=512,
+        pred_concat=True),
+    loss=dict(type='SARLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=30)
--- a/configs/_base_/recog_models/transformer.py
+++ b/configs/_base_/recog_models/transformer.py
@ -0,0 +1,11 @@
+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=False)
+
+model = dict(
+    type='TransformerNet',
+    backbone=dict(type='ResNet31OCR'),
+    encoder=dict(type='TFEncoder'),
+    decoder=dict(type='TFDecoder'),
+    loss=dict(type='TFLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=40)
--- a/configs/_base_/runtime_10e.py
+++ b/configs/_base_/runtime_10e.py
@ -0,0 +1,14 @@
+checkpoint_config = dict(interval=10)
+# yapf:disable
+log_config = dict(
+    interval=5,
+    hooks=[
+        dict(type='TextLoggerHook')
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/_base_/schedules/schedule_1200e.py
+++ b/configs/_base_/schedules/schedule_1200e.py
@ -0,0 +1,6 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
+total_epochs = 1200
--- a/configs/_base_/schedules/schedule_160e.py
+++ b/configs/_base_/schedules/schedule_160e.py
@ -0,0 +1,11 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.08, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[80, 128])
+total_epochs = 160
--- a/configs/_base_/schedules/schedule_1x.py
+++ b/configs/_base_/schedules/schedule_1x.py
@ -0,0 +1,11 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[8, 11])
+total_epochs = 12
--- a/configs/_base_/schedules/schedule_20e.py
+++ b/configs/_base_/schedules/schedule_20e.py
@ -0,0 +1,4 @@
+_base_ = './schedule_1x.py'
+# learning policy
+lr_config = dict(step=[16, 19])
+total_epochs = 20
--- a/configs/_base_/schedules/schedule_2e.py
+++ b/configs/_base_/schedules/schedule_2e.py
@ -0,0 +1,6 @@
+# optimizer
+optimizer = dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=True)
+total_epochs = 2
--- a/configs/_base_/schedules/schedule_2x.py
+++ b/configs/_base_/schedules/schedule_2x.py
@ -0,0 +1,4 @@
+_base_ = './schedule_1x.py'
+# learning policy
+lr_config = dict(step=[16, 22])
+total_epochs = 24
--- a/configs/_base_/schedules/schedule_adadelta_16e.py
+++ b/configs/_base_/schedules/schedule_adadelta_16e.py
@ -0,0 +1,6 @@
+# optimizer
+optimizer = dict(type='Adadelta', lr=1.0)
+optimizer_config = dict(grad_clip=dict(max_norm=0.5))
+# learning policy
+lr_config = dict(policy='step', step=[8, 10, 12])
+total_epochs = 16
--- a/configs/_base_/schedules/schedule_adadelta_8e.py
+++ b/configs/_base_/schedules/schedule_adadelta_8e.py
@ -0,0 +1,6 @@
+# optimizer
+optimizer = dict(type='Adadelta', lr=1.0)
+optimizer_config = dict(grad_clip=dict(max_norm=0.5))
+# learning policy
+lr_config = dict(policy='step', step=[4, 6, 7])
+total_epochs = 8
--- a/configs/_base_/schedules/schedule_adam_1e.py
+++ b/configs/_base_/schedules/schedule_adam_1e.py
@ -0,0 +1,6 @@
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='poly', power=0.9)
+total_epochs = 1
--- a/configs/_base_/schedules/schedule_adam_600e.py
+++ b/configs/_base_/schedules/schedule_adam_600e.py
@ -0,0 +1,6 @@
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='poly', power=0.9)
+total_epochs = 600
--- a/configs/_base_/schedules/schedule_sgd_600e.py
+++ b/configs/_base_/schedules/schedule_sgd_600e.py
@ -0,0 +1,6 @@
+# optimizer
+optimizer = dict(type='SGD', lr=1e-3, momentum=0.99, weight_decay=5e-4)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[200, 400])
+total_epochs = 600
--- a/configs/kie/sdmgr/README.md
+++ b/configs/kie/sdmgr/README.md
@ -0,0 +1,25 @@
+# Spatial Dual-Modality Graph Reasoning for Key Information Extraction
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@misc{sun2021spatial,
+      title={Spatial Dual-Modality Graph Reasoning for Key Information Extraction},
+      author={Hongbin Sun and Zhanghui Kuang and Xiaoyu Yue and Chenhao Lin and Wayne Zhang},
+      year={2021},
+      eprint={2103.14470},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
+
+## Results and models
+
+### WildReceipt
+
+|                                 Method                                 |     Modality     | Macro F1-Score |                                                                                            Download                                                                                            |
+| :--------------------------------------------------------------------: | :--------------: | :------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|   [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py)   | Visual + Textual |     0.876      |  [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt_20210405-16a47642.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_104508.log.json)  |
+| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) |     Textual      |     0.864      | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_20210405-07bc26ad.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_141138.log.json) |
--- a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py
+++ b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py
@ -0,0 +1,93 @@
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+max_scale, min_scale = 1024, 512
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='KIEFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='KIEFormatBundle'),
+    dict(type='Collect', keys=['img', 'relations', 'texts', 'gt_bboxes'])
+]
+
+dataset_type = 'KIEDataset'
+data_root = 'data/wildreceipt'
+
+loader = dict(
+    type='HardDiskLoader',
+    repeat=1,
+    parser=dict(
+        type='LineJsonParser',
+        keys=['file_name', 'height', 'width', 'annotations']))
+
+train = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/train.txt',
+    pipeline=train_pipeline,
+    img_prefix=data_root,
+    loader=loader,
+    dict_file=f'{data_root}/dict.txt',
+    test_mode=False)
+test = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/test.txt',
+    pipeline=test_pipeline,
+    img_prefix=data_root,
+    loader=loader,
+    dict_file=f'{data_root}/dict.txt',
+    test_mode=True)
+
+data = dict(
+    samples_per_gpu=4, workers_per_gpu=0, train=train, val=test, test=test)
+
+evaluation = dict(
+    interval=1,
+    metric='macro_f1',
+    metric_options=dict(
+        macro_f1=dict(
+            ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25])))
+
+model = dict(
+    type='SDMGR',
+    backbone=dict(type='UNet', base_channels=16),
+    bbox_head=dict(
+        type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26),
+    visual_modality=False,
+    train_cfg=None,
+    test_cfg=None,
+    class_list=f'{data_root}/class_list.txt')
+
+optimizer = dict(type='Adam', weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1,
+    warmup_ratio=1,
+    step=[40, 50])
+total_epochs = 60
+
+checkpoint_config = dict(interval=1)
+log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+
+find_unused_parameters = True
--- a/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py
+++ b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py
@ -0,0 +1,93 @@
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+max_scale, min_scale = 1024, 512
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='KIEFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='KIEFormatBundle'),
+    dict(type='Collect', keys=['img', 'relations', 'texts', 'gt_bboxes'])
+]
+
+dataset_type = 'KIEDataset'
+data_root = 'data/wildreceipt'
+
+loader = dict(
+    type='HardDiskLoader',
+    repeat=1,
+    parser=dict(
+        type='LineJsonParser',
+        keys=['file_name', 'height', 'width', 'annotations']))
+
+train = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/train.txt',
+    pipeline=train_pipeline,
+    img_prefix=data_root,
+    loader=loader,
+    dict_file=f'{data_root}/dict.txt',
+    test_mode=False)
+test = dict(
+    type=dataset_type,
+    ann_file=f'{data_root}/test.txt',
+    pipeline=test_pipeline,
+    img_prefix=data_root,
+    loader=loader,
+    dict_file=f'{data_root}/dict.txt',
+    test_mode=True)
+
+data = dict(
+    samples_per_gpu=4, workers_per_gpu=0, train=train, val=test, test=test)
+
+evaluation = dict(
+    interval=1,
+    metric='macro_f1',
+    metric_options=dict(
+        macro_f1=dict(
+            ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25])))
+
+model = dict(
+    type='SDMGR',
+    backbone=dict(type='UNet', base_channels=16),
+    bbox_head=dict(
+        type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26),
+    visual_modality=True,
+    train_cfg=None,
+    test_cfg=None,
+    class_list=f'{data_root}/class_list.txt')
+
+optimizer = dict(type='Adam', weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1,
+    warmup_ratio=1,
+    step=[40, 50])
+total_epochs = 60
+
+checkpoint_config = dict(interval=1)
+log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+
+find_unused_parameters = True
--- a/configs/textdet/dbnet/README.md
+++ b/configs/textdet/dbnet/README.md
@ -0,0 +1,28 @@
+# Real-time Scene Text Detection with Differentiable Binarization
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@article{Liao_Wan_Yao_Chen_Bai_2020,
+    title={Real-Time Scene Text Detection with Differentiable Binarization},
+    journal={Proceedings of the AAAI Conference on Artificial Intelligence},
+    author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
+    year={2020},
+    pages={11474-11481}}
+```
+
+## Results and models
+
+### CTW1500
+
+|                              Method                               | Pretrained Model |  Training set   |    Test set    | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                    Download                                                                                                                     |
+| :---------------------------------------------------------------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :----: | :-------: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [DBNet](/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py) |     ImageNet     | ICDAR2015 Train | ICDAR2015 Test |  1200   |    736    | 0.731  |   0.871   | 0.795 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.log.json) |
+
+### ICDAR2015
+
+|                                 Method                                 |                                                      Pretrained Model                                                      |  Training set   |    Test set    | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                           Download                                                                                                                            |
+| :--------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [DBNet](/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py) | [Synthtext](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-aa96e477.pth) | ICDAR2015 Train | ICDAR2015 Test |  1200   |   1024    | 0.796  |   0.866   | 0.830 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20210325-91cef9af.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20210325-91cef9af.pth.log.json) |
--- a/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py
+++ b/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py
@ -0,0 +1,96 @@
+_base_ = [
+    '../../_base_/schedules/schedule_1200e.py', '../../_base_/runtime_10e.py'
+]
+model = dict(
+    type='DBNet',
+    pretrained='torchvision://resnet18',
+    backbone=dict(
+        type='ResNet',
+        depth=18,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        style='caffe'),
+    neck=dict(
+        type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
+    bbox_head=dict(
+        type='DBHead',
+        text_repr_type='quad',
+        in_channels=256,
+        loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2015/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# for visualizing img, pls uncomment it.
+# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    # img aug
+    dict(
+        type='ImgAug',
+        args=[['Fliplr', 0.5],
+              dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
+    # random crop
+    dict(type='EastRandomCrop', target_size=(640, 640)),
+    dict(type='DBNetTargets', shrink_ratio=0.4),
+    dict(type='Pad', size_divisor=32),
+    # for visualizing img and gts, pls set visualize = True
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
+        visualize=dict(flag=False, boundary_key='gt_shrink')),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 736),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=8,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        # for debugging top k imgs
+        # select_first_k=200,
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline))
+evaluation = dict(interval=100, metric='hmean-iou')
--- a/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py
+++ b/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py
@ -0,0 +1,105 @@
+_base_ = [
+    '../../_base_/schedules/schedule_1200e.py', '../../_base_/runtime_10e.py'
+]
+load_from = 'checkpoints/textdet/dbnet/res50dcnv2_synthtext.pth'
+
+model = dict(
+    type='DBNet',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        style='caffe',
+        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+        stage_with_dcn=(False, True, True, True)),
+    neck=dict(
+        type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
+    bbox_head=dict(
+        type='DBHead',
+        text_repr_type='quad',
+        in_channels=256,
+        loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2015/'
+# img_norm_cfg = dict(
+#    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# from official dbnet code
+img_norm_cfg = dict(
+    mean=[122.67891434, 116.66876762, 104.00698793],
+    std=[255, 255, 255],
+    to_rgb=False)
+# for visualizing img, pls uncomment it.
+# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    # img aug
+    dict(
+        type='ImgAug',
+        args=[['Fliplr', 0.5],
+              dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
+    # random crop
+    dict(type='EastRandomCrop', target_size=(640, 640)),
+    dict(type='DBNetTargets', shrink_ratio=0.4),
+    dict(type='Pad', size_divisor=32),
+    # for visualizing img and gts, pls set visualize = True
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
+        visualize=dict(flag=False, boundary_key='gt_shrink')),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(4068, 1024),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(4068, 1024), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        # for debugging top k imgs
+        # select_first_k=200,
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline))
+evaluation = dict(interval=100, metric='hmean-iou')
--- a/configs/textdet/maskrcnn/README.md
+++ b/configs/textdet/maskrcnn/README.md
@ -0,0 +1,35 @@
+# Mask R-CNN
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@article{pmtd,
+  author={Jingchao Liu and Xuebo Liu and Jie Sheng and Ding Liang and Xin Li and Qingjie Liu},
+  title={Pyramid Mask Text Detector},
+  journal={CoRR},
+  volume={abs/1903.11800},
+  year={2019}
+}
+```
+
+## Results and models
+
+### CTW1500
+
+|                                 Method                                  | Pretrained Model | Training set  |   Test set   | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                   Download                                                                                                                    |
+| :---------------------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :----: | :-------: | :---: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [MaskRCNN](/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py) |     ImageNet     | CTW1500 Train | CTW1500 Test |   160   |   1600    | 0.753  |   0.712   | 0.732 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.log.json) |
+
+### ICDAR2015
+
+|                                  Method                                   | Pretrained Model |  Training set   |    Test set    | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                     Download                                                                                                                      |
+| :-----------------------------------------------------------------------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [MaskRCNN](/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py) |     ImageNet     | ICDAR2015 Train | ICDAR2015 Test |   160   |   1920    | 0.783  |   0.872   | 0.825 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.log.json) |
+
+### ICDAR2017
+
+|                                  Method                                   | Pretrained Model |  Training set   |   Test set    | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                     Download                                                                                                                      |
+| :-----------------------------------------------------------------------: | :--------------: | :-------------: | :-----------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [MaskRCNN](/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py) |     ImageNet     | ICDAR2017 Train | ICDAR2017 Val |   160   |   1600    | 0.754  |   0.827   | 0.789 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.log.json) |
--- a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py
+++ b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py
@ -0,0 +1,67 @@
+_base_ = [
+    '../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem_poly.py',
+    '../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
+]
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/ctw1500/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=None,
+        keep_ratio=False,
+        resize_type='indep_sample_in_range',
+        scale_range=(640, 2560)),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        mask_type='union_all',
+        instance_key='gt_masks'),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        # resize the long size to 1600
+        img_scale=(1600, 1600),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            # no flip
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        # select_first_k=1,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        # select_first_k=1,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py
+++ b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py
@ -0,0 +1,66 @@
+_base_ = [
+    '../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py',
+    '../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
+]
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2015/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=None,
+        keep_ratio=False,
+        resize_type='indep_sample_in_range',
+        scale_range=(640, 2560)),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        mask_type='union_all',
+        instance_key='gt_masks'),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        # resize the long size to 1600
+        img_scale=(1920, 1920),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            # no flip
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        # select_first_k=1,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        # select_first_k=1,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py
+++ b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py
@ -0,0 +1,67 @@
+_base_ = [
+    '../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py',
+    '../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
+]
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2017/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=None,
+        keep_ratio=False,
+        resize_type='indep_sample_in_range',
+        scale_range=(640, 2560)),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        mask_type='union_all',
+        instance_key='gt_masks'),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        # resize the long size to 1600
+        img_scale=(1600, 1600),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            # no flip
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        # select_first_k=1,
+        ann_file=data_root + '/instances_val.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        # select_first_k=1,
+        ann_file=data_root + '/instances_val.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/panet/README.md
+++ b/configs/textdet/panet/README.md
@ -0,0 +1,35 @@
+# Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@inproceedings{WangXSZWLYS19,
+  author={Wenhai Wang and Enze Xie and Xiaoge Song and Yuhang Zang and Wenjia Wang and Tong Lu and Gang Yu and Chunhua Shen},
+  title={Efficient and Accurate Arbitrary-Shaped Text Detection With Pixel Aggregation Network},
+  booktitle={ICCV},
+  pages={8439--8448},
+  year={2019}
+  }
+```
+
+## Results and models
+
+### CTW1500
+
+|                               Method                               | Pretrained Model | Training set  |   Test set   | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                     Download                                                                                                                      |
+| :----------------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [PANet](/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py) |     ImageNet     | CTW1500 Train | CTW1500 Test |   600   |    640    | 0.790  |   0.838   | 0.813 | [model](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.log.json) |
+
+### ICDAR2015
+
+|                                Method                                | Pretrained Model |  Training set   |    Test set    | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                       Download                                                                                                                        |
+| :------------------------------------------------------------------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :----: | :-------: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [PANet](/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py) |     ImageNet     | ICDAR2015 Train | ICDAR2015 Test |   600   |    736    | 0.734  |   0.856   | 0.791 | [model](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.log.json) |
+
+### ICDAR2017
+
+|                                Method                                | Pretrained Model |  Training set   |   Test set    | #epochs | Test size | Recall | Precision | Hmean |                                                                                                                       Download                                                                                                                        |
+| :------------------------------------------------------------------: | :--------------: | :-------------: | :-----------: | :-----: | :-------: | :----: | :-------: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [PANet](/configs/textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py) |     ImageNet     | ICDAR2017 Train | ICDAR2017 Val |   600   |    800    | 0.604  |   0.812   | 0.693 | [model](https://download.openmmlab.com/mmocr/textdet/panet/panet_r50_fpem_ffm_sbn_600e_icdar2017_20210219-b4877a4f.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/panet/panet_r50_fpem_ffm_sbn_600e_icdar2017_20210219-b4877a4f.log.json) |
--- a/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py
+++ b/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py
@ -0,0 +1,104 @@
+_base_ = [
+    '../../_base_/schedules/schedule_adam_600e.py',
+    '../../_base_/runtime_10e.py'
+]
+model = dict(
+    type='PANet',
+    pretrained='torchvision://resnet18',
+    backbone=dict(
+        type='ResNet',
+        depth=18,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
+    bbox_head=dict(
+        type='PANHead',
+        text_repr_type='poly',
+        in_channels=[128, 128, 128, 128],
+        out_channels=6,
+        loss=dict(type='PANLoss')),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/ctw1500/'
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# for visualizing img, pls uncomment it.
+# img_norm_cfg = dict(
+#    mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 640)],
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        keep_ratio=False),
+    # shrink_ratio is from big to small. The 1st must be 1.0
+    dict(type='PANetTargets', shrink_ratio=(1.0, 0.7)),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    # for visualizing img and gts, pls set visualize = True
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(3000, 640),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        # for debugging top k imgs
+        # select_first_k=200,
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline))
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py
+++ b/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py
@ -0,0 +1,102 @@
+_base_ = [
+    '../../_base_/schedules/schedule_adam_600e.py',
+    '../../_base_/runtime_10e.py'
+]
+model = dict(
+    type='PANet',
+    pretrained='torchvision://resnet18',
+    backbone=dict(
+        type='ResNet',
+        depth=18,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
+    bbox_head=dict(
+        type='PANHead',
+        text_repr_type='quad',
+        in_channels=[128, 128, 128, 128],
+        out_channels=6,
+        loss=dict(type='PANLoss')),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2015/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# for visualizing img, pls uncomment it.
+# img_norm_cfg = dict(
+#    mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 736)],
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        keep_ratio=False),
+    dict(type='PANetTargets', shrink_ratio=(1.0, 0.5), max_shrink=20),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(736, 736),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    # for visualizing img and gts, pls set visualize = True
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 736),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        # for debugging top k imgs
+        # select_first_k=200,
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        # select_first_k=100,
+        pipeline=test_pipeline))
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py
+++ b/configs/textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py
@ -0,0 +1,93 @@
+_base_ = [
+    '../../_base_/schedules/schedule_adam_600e.py',
+    '../../_base_/runtime_10e.py'
+]
+model = dict(
+    type='PANet',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]),
+    bbox_head=dict(
+        type='PANHead',
+        in_channels=[128, 128, 128, 128],
+        out_channels=6,
+        loss=dict(type='PANLoss', speedup_bbox_thr=32)),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2017/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 800)],
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        keep_ratio=False),
+    dict(type='PANetTargets', shrink_ratio=(1.0, 0.5)),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(800, 800),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    # for visualizing img and gts, pls set visualize = True
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_val.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_val.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/psenet/README.md
+++ b/configs/textdet/psenet/README.md
@ -0,0 +1,29 @@
+# PSENet
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@article{li2018shape,
+  title={Shape robust text detection with progressive scale expansion network},
+  author={Li, Xiang and Wang, Wenhai and Hou, Wenbo and Liu, Ruo-Ze and Lu, Tong and Yang, Jian},
+  journal={arXiv preprint arXiv:1806.02559},
+  year={2018}
+}
+```
+
+## Results and models
+
+### CTW1500
+
+|                                Method                                | Backbone | Extra Data | Training set  |   Test set   | #epochs | Test size | Recall | Precision | Hmean |                                                                                                Download                                                                                                |
+| :------------------------------------------------------------------: | :------: | :--------: | :-----------: | :----------: | :-----: | :-------: | :----: | :-------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [PSENet-4s](/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py) | ResNet50 |     -      | CTW1500 Train | CTW1500 Test |   600   |   1280    | 0.728  |   0.849   | 0.784 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/20210401_215421.log.json) |
+
+### ICDAR2015
+
+|                                 Method                                 | Backbone |                                                                Extra Data                                                                 | Training set | Test set  | #epochs | Test size | Recall | Precision | Hmean |                                                                                            Download                                                                                             |
+| :--------------------------------------------------------------------: | :------: | :---------------------------------------------------------------------------------------------------------------------------------------: | :----------: | :-------: | :-----: | :-------: | :----: | :-------: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [PSENet-4s](/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py) | ResNet50 |                                                                     -                                                                     |  IC15 Train  | IC15 Test |   600   |   2240    | 0.784  |   0.831   | 0.807 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015-c6131f0d.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/20210331_214145.log.json) |
+| [PSENet-4s](/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py) | ResNet50 | pretrain on IC17 MLT [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2017_as_pretrain-3bd6056c.pth) |  IC15 Train  | IC15 Test |   600   |   2240    | 0.834  |   0.861   | 0.847 |                                  [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth) \| [log]()                                   |
--- a/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py
+++ b/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py
@ -0,0 +1,108 @@
+_base_ = ['../../_base_/default_runtime.py']
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-4)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[200, 400])
+total_epochs = 600
+
+model = dict(
+    type='PSENet',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPNF',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        fusion_type='concat'),
+    bbox_head=dict(
+        type='PSEHead',
+        text_repr_type='poly',
+        in_channels=[256],
+        out_channels=7,
+        loss=dict(type='PSELoss')),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/ctw1500/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 736)],
+        ratio_range=(0.5, 3),
+        aspect_ratio_range=(1, 1),
+        multiscale_mode='value',
+        long_size_bound=1280,
+        short_size_bound=640,
+        resize_type='long_short_bound',
+        keep_ratio=False),
+    dict(type='PSENetTargets'),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1280, 1280),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
+++ b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
@ -0,0 +1,108 @@
+_base_ = ['../../_base_/runtime_10e.py']
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-4)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[200, 400])
+total_epochs = 600
+
+model = dict(
+    type='PSENet',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPNF',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        fusion_type='concat'),
+    bbox_head=dict(
+        type='PSEHead',
+        text_repr_type='quad',
+        in_channels=[256],
+        out_channels=7,
+        loss=dict(type='PSELoss')),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2015/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 736)],  # unused
+        ratio_range=(0.5, 3),
+        aspect_ratio_range=(1, 1),
+        multiscale_mode='value',
+        long_size_bound=1280,
+        short_size_bound=640,
+        resize_type='long_short_bound',
+        keep_ratio=False),
+    dict(type='PSENetTargets'),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2240, 2200),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(2240, 2200), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py
+++ b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py
@ -0,0 +1,103 @@
+_base_ = [
+    '../../_base_/schedules/schedule_sgd_600e.py',
+    '../../_base_/runtime_10e.py'
+]
+model = dict(
+    type='PSENet',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPNF',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        fusion_type='concat'),
+    bbox_head=dict(
+        type='PSEHead',
+        text_repr_type='quad',
+        in_channels=[256],
+        out_channels=7,
+        loss=dict(type='PSELoss')),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/icdar2017/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 736)],
+        ratio_range=(0.5, 3),
+        aspect_ratio_range=(1, 1),
+        multiscale_mode='value',
+        long_size_bound=1280,
+        short_size_bound=640,
+        resize_type='long_short_bound',
+        keep_ratio=False),
+    dict(type='PSENetTargets'),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='RandomRotateTextDet'),
+    dict(
+        type='RandomCropInstances',
+        target_size=(640, 640),
+        instance_key='gt_kernels'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels', 'gt_mask'],
+        visualize=dict(flag=False, boundary_key='gt_kernels')),
+    dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(2240, 2200),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(2240, 2200), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+
+data = dict(
+    samples_per_gpu=8,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_val.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_val.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textdet/textsnake/README.md
+++ b/configs/textdet/textsnake/README.md
@ -0,0 +1,23 @@
+# Textsnake
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@article{long2018textsnake,
+  title={TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes},
+  author={Long, Shangbang and Ruan, Jiaqiang and Zhang, Wenjie and He, Xin and Wu, Wenhao and Yao, Cong},
+  booktitle={ECCV},
+  pages={20-36},
+  year={2018}
+}
+```
+
+## Results and models
+
+### CTW1500
+
+|                                     Method                                     | Pretrained Model | Training set  |   Test set   | #epochs | Test size | Recall | Precision | Hmean |                                                           Download                                                           |
+| :----------------------------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :----: | :-------: | :---: | :--------------------------------------------------------------------------------------------------------------------------: |
+| [TextSnake](/configs/textdet/textsnake/textsnake_r50_fpn_unet_600e_ctw1500.py) |     ImageNet     | CTW1500 Train | CTW1500 Test |  1200   |    736    | 0.795  |   0.840   | 0.817 | [model](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth) \| [log]() |
--- a/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py
+++ b/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py
@ -0,0 +1,113 @@
+_base_ = [
+    '../../_base_/schedules/schedule_1200e.py',
+    '../../_base_/default_runtime.py'
+]
+model = dict(
+    type='TextSnake',
+    pretrained='torchvision://resnet50',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPN_UNET', in_channels=[256, 512, 1024, 2048], out_channels=32),
+    bbox_head=dict(
+        type='TextSnakeHead',
+        in_channels=32,
+        text_repr_type='poly',
+        loss=dict(type='TextSnakeLoss')),
+    train_cfg=None,
+    test_cfg=None)
+
+dataset_type = 'IcdarDataset'
+data_root = 'data/ctw1500/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadTextAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        poly2mask=False),
+    dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(
+        type='RandomCropPolyInstances',
+        instance_key='gt_masks',
+        crop_ratio=0.65,
+        min_side_ratio=0.3),
+    dict(
+        type='RandomRotatePolyInstances',
+        rotate_ratio=0.5,
+        max_angle=20,
+        pad_with_fixed_color=False),
+    dict(
+        type='ScaleAspectJitter',
+        img_scale=[(3000, 736)],  # unused
+        ratio_range=(0.7, 1.3),
+        aspect_ratio_range=(0.9, 1.1),
+        multiscale_mode='value',
+        long_size_bound=800,
+        short_size_bound=480,
+        resize_type='long_short_bound',
+        keep_ratio=False),
+    dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
+    dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
+    dict(type='TextSnakeTargets'),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='CustomFormatBundle',
+        keys=[
+            'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
+        ],
+        visualize=dict(flag=False, boundary_key='gt_text_mask')),
+    dict(
+        type='Collect',
+        keys=[
+            'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
+        ])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 736),
+        flip=False,
+        transforms=[
+            dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_training.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + '/instances_test.json',
+        img_prefix=data_root + '/imgs',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=10, metric='hmean-iou')
--- a/configs/textrecog/crnn/README.md
+++ b/configs/textrecog/crnn/README.md
@ -0,0 +1,37 @@
+# An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@article{shi2016end,
+  title={An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition},
+  author={Shi, Baoguang and Bai, Xiang and Yao, Cong},
+  journal={IEEE transactions on pattern analysis and machine intelligence},
+  year={2016}
+}
+```
+
+## Results and Models
+
+### Train Dataset
+
+| trainset | instance_num | repeat_num | note  |
+| :------: | :----------: | :--------: | :---: |
+|  Syn90k  |   8919273    |     1      | synth |
+
+### Test Dataset
+
+| testset | instance_num |  note   |
+| :-----: | :----------: | :-----: |
+| IIIT5K  |     3000     | regular |
+|   SVT   |     647      | regular |
+|  IC13   |     1015     | regular |
+
+## Results and models
+
+| methods |        | Regular Text |      |     |      | Irregular Text |      |       download       |
+| :-----: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :------------------: |
+| methods | IIIT5K |     SVT      | IC13 |     | IC15 |      SVTP      | CT80 |
+|  CRNN   |  80.5  |     81.5     | 86.5 |     |  -   |       -        |  -   | [model](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_academic-a723a1c5.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/crnn/20210326_111035.log.json) |
--- a/configs/textrecog/crnn/crnn_academic_dataset.py
+++ b/configs/textrecog/crnn/crnn_academic_dataset.py
@ -0,0 +1,138 @@
+_base_ = []
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=1,
+    hooks=[
+        dict(type='TextLoggerHook')
+
+    ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+
+# model
+label_convertor = dict(
+    type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True)
+
+model = dict(
+    type='CRNNNet',
+    preprocessor=None,
+    backbone=dict(type='VeryDeepVgg', leakyRelu=False, input_channels=1),
+    encoder=None,
+    decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True),
+    loss=dict(type='CTCLoss'),
+    label_convertor=label_convertor,
+    pretrained=None)
+
+train_cfg = None
+test_cfg = None
+
+# optimizer
+optimizer = dict(type='Adadelta', lr=1.0)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[])
+total_epochs = 5
+
+# data
+img_norm_cfg = dict(mean=[0.5], std=[0.5])
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=100,
+        max_width=100,
+        keep_aspect_ratio=False),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=4,
+        max_width=None,
+        keep_aspect_ratio=True),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=['filename', 'ori_shape', 'img_shape', 'valid_ratio']),
+]
+
+dataset_type = 'OCRDataset'
+
+train_img_prefix = 'data/mixture/Syn90k/mnt/ramdisk/max/90kDICT32px'
+train_ann_file = 'data/mixture/Syn90k/label.lmdb'
+
+train1 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix,
+    ann_file=train_ann_file,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+test_prefix = 'data/mixture/'
+test_img_prefix1 = test_prefix + 'icdar_2013/'
+test_img_prefix2 = test_prefix + 'IIIT5K/'
+test_img_prefix3 = test_prefix + 'svt/'
+
+test_ann_file1 = test_prefix + 'icdar_2013/test_label_1015.txt'
+test_ann_file2 = test_prefix + 'IIIT5K/test_label.txt'
+test_ann_file3 = test_prefix + 'svt/test_label.txt'
+
+test1 = dict(
+    type=dataset_type,
+    img_prefix=test_img_prefix1,
+    ann_file=test_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+test2 = {key: value for key, value in test1.items()}
+test2['img_prefix'] = test_img_prefix2
+test2['ann_file'] = test_ann_file2
+
+test3 = {key: value for key, value in test1.items()}
+test3['img_prefix'] = test_img_prefix3
+test3['ann_file'] = test_ann_file3
+
+data = dict(
+    samples_per_gpu=64,
+    workers_per_gpu=4,
+    train=dict(type='ConcatDataset', datasets=[train1]),
+    val=dict(type='ConcatDataset', datasets=[test1, test2, test3]),
+    test=dict(type='ConcatDataset', datasets=[test1, test2, test3]))
+
+evaluation = dict(interval=1, metric='acc')
+
+cudnn_benchmark = True
--- a/configs/textrecog/crnn/crnn_toy_dataset.py
+++ b/configs/textrecog/crnn/crnn_toy_dataset.py
@ -0,0 +1,6 @@
+_base_ = [
+    '../../_base_/schedules/schedule_adadelta_8e.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/recog_datasets/toy_dataset.py',
+    '../../_base_/recog_models/crnn.py'
+]
--- a/configs/textrecog/nrtr/README.md
+++ b/configs/textrecog/nrtr/README.md
@ -0,0 +1,61 @@
+# NRTR
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@inproceedings{sheng2019nrtr,
+  title={NRTR: A no-recurrence sequence-to-sequence model for scene text recognition},
+  author={Sheng, Fenfen and Chen, Zhineng and Xu, Bo},
+  booktitle={2019 International Conference on Document Analysis and Recognition (ICDAR)},
+  pages={781--786},
+  year={2019},
+  organization={IEEE}
+}
+```
+
+[BACKBONE]
+
+```bibtex
+@inproceedings{li2019show,
+  title={Show, attend and read: A simple and strong baseline for irregular text recognition},
+  author={Li, Hui and Wang, Peng and Shen, Chunhua and Zhang, Guyu},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume={33},
+  number={01},
+  pages={8610--8617},
+  year={2019}
+}
+```
+
+## Dataset
+
+### Train Dataset
+
+|  trainset  | instance_num | repeat_num |          source          |
+| :--------: | :----------: | :--------: | :----------------------: |
+| SynthText  |   7266686    |     1      |          synth           |
+|   Syn90k   |   8919273    |     1      |          synth           |
+
+### Test Dataset
+
+| testset | instance_num |            type             |
+| :-----: | :----------: | :-------------------------: |
+| IIIT5K  |     3000     |           regular           |
+|   SVT   |     647      |           regular           |
+|  IC13   |     1015     |           regular           |
+|  IC15   |     2077     |          irregular          |
+|  SVTP   |     645      |          irregular          |
+|  CT80   |     288      |          irregular          |
+
+## Results and Models
+
+| Methods | Backbone || Regular Text |||| Irregular Text ||download|
+| :-------: | :---------: | :----: | :----: | :--: | :-: | :--: | :------: | :--: | :-----: |
+| | | IIIT5K |     SVT      | IC13 |     | IC15 |      SVTP      | CT80 |
+| [NRTR](/configs/textrecog/nrtr/nrtr_r31_academic.py)  | R31-1/16-1/8  |  93.9  |  90.0| 93.5 |     | 74.5 |      78.5      | 86.5 |  [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_academic_20210406-954db95e.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/20210406_010150.log.json)  |
+
+**Notes:**
+
+- `R31-1/16-1/8` means the height of feature from backbone is 1/16 of input image, where 1/8 for width.
--- a/configs/textrecog/nrtr/nrtr_modality_toy.py
+++ b/configs/textrecog/nrtr/nrtr_modality_toy.py
@ -0,0 +1,112 @@
+_base_ = [
+    '../../_base_/default_runtime.py',
+    '../../_base_/recog_models/nrtr.py',
+]
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 6
+
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=32,
+        max_width=100,
+        keep_aspect_ratio=False),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=32,
+                min_width=32,
+                max_width=100,
+                keep_aspect_ratio=False),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
+                ]),
+        ])
+]
+
+dataset_type = 'OCRDataset'
+img_prefix = 'tests/data/ocr_toy_dataset/imgs'
+train_anno_file1 = 'tests/data/ocr_toy_dataset/label.txt'
+train1 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=100,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train_anno_file2 = 'tests/data/ocr_toy_dataset/label.lmdb'
+train2 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file2,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=100,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+test_anno_file1 = 'tests/data/ocr_toy_dataset/label.lmdb'
+test = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=test_anno_file1,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=2,
+    train=dict(type='ConcatDataset', datasets=[train1, train2]),
+    val=dict(type='ConcatDataset', datasets=[test]),
+    test=dict(type='ConcatDataset', datasets=[test]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/textrecog/nrtr/nrtr_r31_academic.py
+++ b/configs/textrecog/nrtr/nrtr_r31_academic.py
@ -0,0 +1,163 @@
+_base_ = [
+    '../../_base_/default_runtime.py', '../../_base_/recog_models/nrtr.py'
+]
+
+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+
+model = dict(
+    type='NRTR',
+    backbone=dict(
+        type='ResNet31OCR',
+        layers=[1, 2, 5, 3],
+        channels=[32, 64, 128, 256, 512, 512],
+        stage4_pool_cfg=dict(kernel_size=(2, 1), stride=(2, 1)),
+        last_stage_pool=True),
+    encoder=dict(type='TFEncoder'),
+    decoder=dict(type='TFDecoder'),
+    loss=dict(type='TFLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=40)
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 6
+
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=32,
+        min_width=32,
+        max_width=160,
+        keep_aspect_ratio=True,
+        width_downsample_ratio=0.25),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=32,
+                min_width=32,
+                max_width=160,
+                keep_aspect_ratio=True,
+                width_downsample_ratio=0.25),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
+                ]),
+        ])
+]
+
+dataset_type = 'OCRDataset'
+
+train_prefix = 'data/mixture/'
+
+train_img_prefix1 = train_prefix + \
+    'SynthText/synthtext/SynthText_patch_horizontal'
+train_img_prefix2 = train_prefix + 'Syn90k/mnt/ramdisk/max/90kDICT32px'
+
+train_ann_file1 = train_prefix + 'SynthText/label.lmdb',
+train_ann_file2 = train_prefix + 'Syn90k/label.lmdb'
+
+train1 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+
+test_prefix = 'data/mixture/'
+test_img_prefix1 = test_prefix + 'IIIT5K/'
+test_img_prefix2 = test_prefix + 'svt/'
+test_img_prefix3 = test_prefix + 'icdar_2013/'
+test_img_prefix4 = test_prefix + 'icdar_2015/'
+test_img_prefix5 = test_prefix + 'svtp/'
+test_img_prefix6 = test_prefix + 'ct80/'
+
+test_ann_file1 = test_prefix + 'IIIT5K/test_label.txt'
+test_ann_file2 = test_prefix + 'svt/test_label.txt'
+test_ann_file3 = test_prefix + 'icdar_2013/test_label_1015.txt'
+test_ann_file4 = test_prefix + 'icdar_2015/test_label.txt'
+test_ann_file5 = test_prefix + 'svtp/test_label.txt'
+test_ann_file6 = test_prefix + 'ct80/test_label.txt'
+
+test1 = dict(
+    type=dataset_type,
+    img_prefix=test_img_prefix1,
+    ann_file=test_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+test2 = {key: value for key, value in test1.items()}
+test2['img_prefix'] = test_img_prefix2
+test2['ann_file'] = test_ann_file2
+
+test3 = {key: value for key, value in test1.items()}
+test3['img_prefix'] = test_img_prefix3
+test3['ann_file'] = test_ann_file3
+
+test4 = {key: value for key, value in test1.items()}
+test4['img_prefix'] = test_img_prefix4
+test4['ann_file'] = test_ann_file4
+
+test5 = {key: value for key, value in test1.items()}
+test5['img_prefix'] = test_img_prefix5
+test5['ann_file'] = test_ann_file5
+
+test6 = {key: value for key, value in test1.items()}
+test6['img_prefix'] = test_img_prefix6
+test6['ann_file'] = test_ann_file6
+
+data = dict(
+    samples_per_gpu=128,
+    workers_per_gpu=4,
+    train=dict(type='ConcatDataset', datasets=[train1, train2]),
+    val=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]),
+    test=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/textrecog/robust_scanner/README.md
+++ b/configs/textrecog/robust_scanner/README.md
@ -0,0 +1,51 @@
+# RobustScanner: Dynamically Enhancing Positional Clues for Robust Text Recognition
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@inproceedings{yue2020robustscanner,
+  title={RobustScanner: Dynamically Enhancing Positional Clues for Robust Text Recognition},
+  author={Yue, Xiaoyu and Kuang, Zhanghui and Lin, Chenhao and Sun, Hongbin and Zhang, Wayne},
+  booktitle={European Conference on Computer Vision},
+  year={2020}
+}
+```
+
+## Dataset
+
+### Train Dataset
+
+|  trainset  | instance_num | repeat_num |          source          |
+| :--------: | :----------: | :--------: | :----------------------: |
+| icdar_2011 |     3567     |     20     |           real           |
+| icdar_2013 |     848      |     20     |           real           |
+| icdar2015  |     4468     |     20     |           real           |
+| coco_text  |    42142     |     20     |           real           |
+|   IIIT5K   |     2000     |     20     |           real           |
+| SynthText  |   2400000    |     1      |          synth           |
+|  SynthAdd  |   1216889    |     1      | synth, 1.6m in [[1]](#1) |
+|   Syn90k   |   2400000    |     1      |          synth           |
+
+### Test Dataset
+
+| testset | instance_num |            type             |
+| :-----: | :----------: | :-------------------------: |
+| IIIT5K  |     3000     |           regular           |
+|   SVT   |     647      |           regular           |
+|  IC13   |     1015     |           regular           |
+|  IC15   |     2077     |          irregular          |
+|  SVTP   |     645      | irregular, 639 in [[1]](#1) |
+|  CT80   |     288      |          irregular          |
+
+## Results and Models
+
+|                               Methods                               |  GPUs   |        | Regular Text |      |     |      | Irregular Text |      |                                                                                              download                                                                                              |
+| :-----------------------------------------------------------------: | :---------: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|                                                                     |             | IIIT5K |     SVT      | IC13 |     | IC15 |      SVTP      | CT80 |
+| [RobustScanner](configs/textrecog/robust_scanner/robustscanner_r31_academic.py)  | 16 |  95.1  |     89.2     | 93.1 |     | 77.8 |      80.3      | 90.3 |  [model](https://download.openmmlab.com/mmocr/textrecog/robustscanner/robustscanner_r31_academic-5f05874f.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/robustscanner/20210401_170932.log.json)  |
+
+## References
+
+<a id="1">[1]</a> Li, Hui and Wang, Peng and Shen, Chunhua and Zhang, Guyu. Show, attend and read: A simple and strong baseline for irregular text recognition. In AAAI 2019.
--- a/configs/textrecog/robust_scanner/robust_scanner_toy_dataset.py
+++ b/configs/textrecog/robust_scanner/robust_scanner_toy_dataset.py
@ -0,0 +1,12 @@
+_base_ = [
+    '../../_base_/default_runtime.py',
+    '../../_base_/recog_models/robust_scanner.py',
+    '../../_base_/recog_datasets/toy_dataset.py'
+]
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 6
--- a/configs/textrecog/robust_scanner/robustscanner_r31_academic.py
+++ b/configs/textrecog/robust_scanner/robustscanner_r31_academic.py
@ -0,0 +1,197 @@
+_base_ = [
+    '../../_base_/default_runtime.py',
+    '../../_base_/recog_models/robust_scanner.py'
+]
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 5
+
+img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=48,
+        min_width=48,
+        max_width=160,
+        keep_aspect_ratio=True,
+        width_downsample_ratio=0.25),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=48,
+                min_width=48,
+                max_width=160,
+                keep_aspect_ratio=True,
+                width_downsample_ratio=0.25),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
+                ]),
+        ])
+]
+
+dataset_type = 'OCRDataset'
+
+train_prefix = 'data/mixture/'
+
+train_img_prefix1 = train_prefix + 'icdar_2011'
+train_img_prefix2 = train_prefix + 'icdar_2013'
+train_img_prefix3 = train_prefix + 'icdar_2015'
+train_img_prefix4 = train_prefix + 'coco_text'
+train_img_prefix5 = train_prefix + 'III5K'
+train_img_prefix6 = train_prefix + 'SynthText_Add'
+train_img_prefix7 = train_prefix + 'SynthText'
+train_img_prefix8 = train_prefix + 'Syn90k'
+
+train_ann_file1 = train_prefix + 'icdar_2011/train_label.txt',
+train_ann_file2 = train_prefix + 'icdar_2013/train_label.txt',
+train_ann_file3 = train_prefix + 'icdar_2015/train_label.txt',
+train_ann_file4 = train_prefix + 'coco_text/train_label.txt',
+train_ann_file5 = train_prefix + 'III5K/train_label.txt',
+train_ann_file6 = train_prefix + 'SynthText_Add/label.txt',
+train_ann_file7 = train_prefix + 'SynthText/shuffle_labels.txt',
+train_ann_file8 = train_prefix + 'Syn90k/shuffle_labels.txt'
+
+train1 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=20,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+
+train3 = {key: value for key, value in train1.items()}
+train3['img_prefix'] = train_img_prefix3
+train3['ann_file'] = train_ann_file3
+
+train4 = {key: value for key, value in train1.items()}
+train4['img_prefix'] = train_img_prefix4
+train4['ann_file'] = train_ann_file4
+
+train5 = {key: value for key, value in train1.items()}
+train5['img_prefix'] = train_img_prefix5
+train5['ann_file'] = train_ann_file5
+
+train6 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix6,
+    ann_file=train_ann_file6,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train7 = {key: value for key, value in train6.items()}
+train7['img_prefix'] = train_img_prefix7
+train7['ann_file'] = train_ann_file7
+
+train8 = {key: value for key, value in train6.items()}
+train8['img_prefix'] = train_img_prefix8
+train8['ann_file'] = train_ann_file8
+
+test_prefix = 'data/mixture/'
+test_img_prefix1 = test_prefix + 'IIIT5K/'
+test_img_prefix2 = test_prefix + 'svt/'
+test_img_prefix3 = test_prefix + 'icdar_2013/'
+test_img_prefix4 = test_prefix + 'icdar_2015/'
+test_img_prefix5 = test_prefix + 'svtp/'
+test_img_prefix6 = test_prefix + 'ct80/'
+
+test_ann_file1 = test_prefix + 'IIIT5K/test_label.txt'
+test_ann_file2 = test_prefix + 'svt/test_label.txt'
+test_ann_file3 = test_prefix + 'icdar_2013/test_label_1015.txt'
+test_ann_file4 = test_prefix + 'icdar_2015/test_label.txt'
+test_ann_file5 = test_prefix + 'svtp/test_label.txt'
+test_ann_file6 = test_prefix + 'ct80/test_label.txt'
+
+test1 = dict(
+    type=dataset_type,
+    img_prefix=test_img_prefix1,
+    ann_file=test_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+test2 = {key: value for key, value in test1.items()}
+test2['img_prefix'] = test_img_prefix2
+test2['ann_file'] = test_ann_file2
+
+test3 = {key: value for key, value in test1.items()}
+test3['img_prefix'] = test_img_prefix3
+test3['ann_file'] = test_ann_file3
+
+test4 = {key: value for key, value in test1.items()}
+test4['img_prefix'] = test_img_prefix4
+test4['ann_file'] = test_ann_file4
+
+test5 = {key: value for key, value in test1.items()}
+test5['img_prefix'] = test_img_prefix5
+test5['ann_file'] = test_ann_file5
+
+test6 = {key: value for key, value in test1.items()}
+test6['img_prefix'] = test_img_prefix6
+test6['ann_file'] = test_ann_file6
+
+data = dict(
+    samples_per_gpu=64,
+    workers_per_gpu=2,
+    train=dict(
+        type='ConcatDataset',
+        datasets=[
+            train1, train2, train3, train4, train5, train6, train7, train8
+        ]),
+    val=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]),
+    test=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/textrecog/sar/README.md
+++ b/configs/textrecog/sar/README.md
@ -0,0 +1,67 @@
+# Show, Attend and Read: A Simple and Strong Baseline for Irregular Text Recognition
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@inproceedings{li2019show,
+  title={Show, attend and read: A simple and strong baseline for irregular text recognition},
+  author={Li, Hui and Wang, Peng and Shen, Chunhua and Zhang, Guyu},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume={33},
+  number={01},
+  pages={8610--8617},
+  year={2019}
+}
+```
+
+## Dataset
+
+### Train Dataset
+
+|  trainset  | instance_num | repeat_num |          source          |
+| :--------: | :----------: | :--------: | :----------------------: |
+| icdar_2011 |     3567     |     20     |           real           |
+| icdar_2013 |     848      |     20     |           real           |
+| icdar2015  |     4468     |     20     |           real           |
+| coco_text  |    42142     |     20     |           real           |
+|   IIIT5K   |     2000     |     20     |           real           |
+| SynthText  |   2400000    |     1      |          synth           |
+|  SynthAdd  |   1216889    |     1      | synth, 1.6m in [[1]](#1) |
+|   Syn90k   |   2400000    |     1      |          synth           |
+
+### Test Dataset
+
+| testset | instance_num |            type             |
+| :-----: | :----------: | :-------------------------: |
+| IIIT5K  |     3000     |           regular           |
+|   SVT   |     647      |           regular           |
+|  IC13   |     1015     |           regular           |
+|  IC15   |     2077     |          irregular          |
+|  SVTP   |     645      | irregular, 639 in [[1]](#1) |
+|  CT80   |     288      |          irregular          |
+
+## Results and Models
+
+|                               Methods                               |  Backbone   |       Decoder        |        | Regular Text |      |     |      | Irregular Text |      |                                                                                              download                                                                                              |
+| :-----------------------------------------------------------------: | :---------: | :------------------: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|                                                                     |             |                      | IIIT5K |     SVT      | IC13 |     | IC15 |      SVTP      | CT80 |
+| [SAR](/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py)  | R31-1/8-1/4 |  ParallelSARDecoder  |  95.0  |     89.6     | 93.7 |     | 79.0 |      82.2      | 88.9 |  [model](https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/sar/20210327_154129.log.json)  |
+| [SAR](configs/textrecog/sar/sar_r31_sequential_decoder_academic.py) | R31-1/8-1/4 | SequentialSARDecoder |  95.2  |     88.7     | 92.4 |     | 78.2 |      81.9      | 89.6 | [model](https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_sequential_decoder_academic-d06c9a8e.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/sar/20210330_105728.log.json) |
+
+**Notes:**
+
+-   `R31-1/8-1/4` means the height of feature from backbone is 1/8 of input image, where 1/4 for width.
+-   We did not use beam search during decoding.
+-   We implemented two kinds of decoder. Namely, `ParallelSARDecoder` and `SequentialSARDecoder`.
+    -   `ParallelSARDecoder`: Parallel decoding during training with `LSTM` layer. It would be faster.
+    -   `SequentialSARDecoder`: Sequential Decoding during training with `LSTMCell`. It would be easier to understand.
+-   For train dataset.
+    -   We did not construct distinct data groups (20 groups in [[1]](#1)) to train the model group-by-group since it would render model training too complicated.
+    -   Instead, we randomly selected `2.4m` patches from `Syn90k`, `2.4m` from `SynthText` and `1.2m` from `SynthAdd`, and grouped all data together. See [config](https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_academic.py) for details.
+-   We used 48 GPUs with `total_batch_size = 64 * 48` in the experiment above to speedup training, while keeping the `initial lr = 1e-3` unchanged.
+
+## References
+
+<a id="1">[1]</a> Li, Hui and Wang, Peng and Shen, Chunhua and Zhang, Guyu. Show, attend and read: A simple and strong baseline for irregular text recognition. In AAAI 2019.
--- a/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py
+++ b/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py
@ -0,0 +1,219 @@
+_base_ = ['../../_base_/default_runtime.py']
+
+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+
+model = dict(
+    type='SARNet',
+    backbone=dict(type='ResNet31OCR'),
+    encoder=dict(
+        type='SAREncoder',
+        enc_bi_rnn=False,
+        enc_do_rnn=0.1,
+        enc_gru=False,
+    ),
+    decoder=dict(
+        type='ParallelSARDecoder',
+        enc_bi_rnn=False,
+        dec_bi_rnn=False,
+        dec_do_rnn=0,
+        dec_gru=False,
+        pred_dropout=0.1,
+        d_k=512,
+        pred_concat=True),
+    loss=dict(type='SARLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=30)
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 5
+
+img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=48,
+        min_width=48,
+        max_width=160,
+        keep_aspect_ratio=True,
+        width_downsample_ratio=0.25),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=48,
+                min_width=48,
+                max_width=160,
+                keep_aspect_ratio=True,
+                width_downsample_ratio=0.25),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
+                ]),
+        ])
+]
+
+dataset_type = 'OCRDataset'
+
+train_prefix = 'data/mixture/'
+
+train_img_prefix1 = train_prefix + 'icdar_2011'
+train_img_prefix2 = train_prefix + 'icdar_2013'
+train_img_prefix3 = train_prefix + 'icdar_2015'
+train_img_prefix4 = train_prefix + 'coco_text'
+train_img_prefix5 = train_prefix + 'III5K'
+train_img_prefix6 = train_prefix + 'SynthText_Add'
+train_img_prefix7 = train_prefix + 'SynthText'
+train_img_prefix8 = train_prefix + 'Syn90k'
+
+train_ann_file1 = train_prefix + 'icdar_2011/train_label.txt',
+train_ann_file2 = train_prefix + 'icdar_2013/train_label.txt',
+train_ann_file3 = train_prefix + 'icdar_2015/train_label.txt',
+train_ann_file4 = train_prefix + 'coco_text/train_label.txt',
+train_ann_file5 = train_prefix + 'III5K/train_label.txt',
+train_ann_file6 = train_prefix + 'SynthText_Add/label.txt',
+train_ann_file7 = train_prefix + 'SynthText/shuffle_labels.txt',
+train_ann_file8 = train_prefix + 'Syn90k/shuffle_labels.txt'
+
+train1 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=20,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+
+train3 = {key: value for key, value in train1.items()}
+train3['img_prefix'] = train_img_prefix3
+train3['ann_file'] = train_ann_file3
+
+train4 = {key: value for key, value in train1.items()}
+train4['img_prefix'] = train_img_prefix4
+train4['ann_file'] = train_ann_file4
+
+train5 = {key: value for key, value in train1.items()}
+train5['img_prefix'] = train_img_prefix5
+train5['ann_file'] = train_ann_file5
+
+train6 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix6,
+    ann_file=train_ann_file6,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train7 = {key: value for key, value in train6.items()}
+train7['img_prefix'] = train_img_prefix7
+train7['ann_file'] = train_ann_file7
+
+train8 = {key: value for key, value in train6.items()}
+train8['img_prefix'] = train_img_prefix8
+train8['ann_file'] = train_ann_file8
+
+test_prefix = 'data/mixture/'
+test_img_prefix1 = test_prefix + 'IIIT5K/'
+test_img_prefix2 = test_prefix + 'svt/'
+test_img_prefix3 = test_prefix + 'icdar_2013/'
+test_img_prefix4 = test_prefix + 'icdar_2015/'
+test_img_prefix5 = test_prefix + 'svtp/'
+test_img_prefix6 = test_prefix + 'ct80/'
+
+test_ann_file1 = test_prefix + 'IIIT5K/test_label.txt'
+test_ann_file2 = test_prefix + 'svt/test_label.txt'
+test_ann_file3 = test_prefix + 'icdar_2013/test_label_1015.txt'
+test_ann_file4 = test_prefix + 'icdar_2015/test_label.txt'
+test_ann_file5 = test_prefix + 'svtp/test_label.txt'
+test_ann_file6 = test_prefix + 'ct80/test_label.txt'
+
+test1 = dict(
+    type=dataset_type,
+    img_prefix=test_img_prefix1,
+    ann_file=test_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+test2 = {key: value for key, value in test1.items()}
+test2['img_prefix'] = test_img_prefix2
+test2['ann_file'] = test_ann_file2
+
+test3 = {key: value for key, value in test1.items()}
+test3['img_prefix'] = test_img_prefix3
+test3['ann_file'] = test_ann_file3
+
+test4 = {key: value for key, value in test1.items()}
+test4['img_prefix'] = test_img_prefix4
+test4['ann_file'] = test_ann_file4
+
+test5 = {key: value for key, value in test1.items()}
+test5['img_prefix'] = test_img_prefix5
+test5['ann_file'] = test_ann_file5
+
+test6 = {key: value for key, value in test1.items()}
+test6['img_prefix'] = test_img_prefix6
+test6['ann_file'] = test_ann_file6
+
+data = dict(
+    samples_per_gpu=64,
+    workers_per_gpu=2,
+    train=dict(
+        type='ConcatDataset',
+        datasets=[
+            train1, train2, train3, train4, train5, train6, train7, train8
+        ]),
+    val=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]),
+    test=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/textrecog/sar/sar_r31_parallel_decoder_toy_dataset.py
+++ b/configs/textrecog/sar/sar_r31_parallel_decoder_toy_dataset.py
@ -0,0 +1,110 @@
+_base_ = [
+    '../../_base_/default_runtime.py', '../../_base_/recog_models/sar.py'
+]
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 5
+
+img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=48,
+        min_width=48,
+        max_width=160,
+        keep_aspect_ratio=True),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=48,
+                min_width=48,
+                max_width=160,
+                keep_aspect_ratio=True),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
+                ]),
+        ])
+]
+
+dataset_type = 'OCRDataset'
+img_prefix = 'tests/data/ocr_toy_dataset/imgs'
+train_anno_file1 = 'tests/data/ocr_toy_dataset/label.txt'
+train1 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=100,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train_anno_file2 = 'tests/data/ocr_toy_dataset/label.lmdb'
+train2 = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file2,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=100,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+test_anno_file1 = 'tests/data/ocr_toy_dataset/label.lmdb'
+test = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=test_anno_file1,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=2,
+    train=dict(type='ConcatDataset', datasets=[train1, train2]),
+    val=dict(type='ConcatDataset', datasets=[test]),
+    test=dict(type='ConcatDataset', datasets=[test]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py
+++ b/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py
@ -0,0 +1,219 @@
+_base_ = ['../../_base_/default_runtime.py']
+
+label_convertor = dict(
+    type='AttnConvertor', dict_type='DICT90', with_unknown=True)
+
+model = dict(
+    type='SARNet',
+    backbone=dict(type='ResNet31OCR'),
+    encoder=dict(
+        type='SAREncoder',
+        enc_bi_rnn=False,
+        enc_do_rnn=0.1,
+        enc_gru=False,
+    ),
+    decoder=dict(
+        type='SequentialSARDecoder',
+        enc_bi_rnn=False,
+        dec_bi_rnn=False,
+        dec_do_rnn=0,
+        dec_gru=False,
+        pred_dropout=0.1,
+        d_k=512,
+        pred_concat=True),
+    loss=dict(type='SARLoss'),
+    label_convertor=label_convertor,
+    max_seq_len=30)
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-3)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 5
+
+img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=48,
+        min_width=48,
+        max_width=160,
+        keep_aspect_ratio=True,
+        width_downsample_ratio=0.25),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiRotateAugOCR',
+        rotate_degrees=[0, 90, 270],
+        transforms=[
+            dict(
+                type='ResizeOCR',
+                height=48,
+                min_width=48,
+                max_width=160,
+                keep_aspect_ratio=True,
+                width_downsample_ratio=0.25),
+            dict(type='ToTensorOCR'),
+            dict(type='NormalizeOCR', **img_norm_cfg),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
+                ]),
+        ])
+]
+
+dataset_type = 'OCRDataset'
+
+train_prefix = 'data/mixture/'
+
+train_img_prefix1 = train_prefix + 'icdar_2011'
+train_img_prefix2 = train_prefix + 'icdar_2013'
+train_img_prefix3 = train_prefix + 'icdar_2015'
+train_img_prefix4 = train_prefix + 'coco_text'
+train_img_prefix5 = train_prefix + 'III5K'
+train_img_prefix6 = train_prefix + 'SynthText_Add'
+train_img_prefix7 = train_prefix + 'SynthText'
+train_img_prefix8 = train_prefix + 'Syn90k'
+
+train_ann_file1 = train_prefix + 'icdar_2011/train_label.txt',
+train_ann_file2 = train_prefix + 'icdar_2013/train_label.txt',
+train_ann_file3 = train_prefix + 'icdar_2015/train_label.txt',
+train_ann_file4 = train_prefix + 'coco_text/train_label.txt',
+train_ann_file5 = train_prefix + 'III5K/train_label.txt',
+train_ann_file6 = train_prefix + 'SynthText_Add/label.txt',
+train_ann_file7 = train_prefix + 'SynthText/shuffle_labels.txt',
+train_ann_file8 = train_prefix + 'Syn90k/shuffle_labels.txt'
+
+train1 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix1,
+    ann_file=train_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=20,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train2 = {key: value for key, value in train1.items()}
+train2['img_prefix'] = train_img_prefix2
+train2['ann_file'] = train_ann_file2
+
+train3 = {key: value for key, value in train1.items()}
+train3['img_prefix'] = train_img_prefix3
+train3['ann_file'] = train_ann_file3
+
+train4 = {key: value for key, value in train1.items()}
+train4['img_prefix'] = train_img_prefix4
+train4['ann_file'] = train_ann_file4
+
+train5 = {key: value for key, value in train1.items()}
+train5['img_prefix'] = train_img_prefix5
+train5['ann_file'] = train_ann_file5
+
+train6 = dict(
+    type=dataset_type,
+    img_prefix=train_img_prefix6,
+    ann_file=train_ann_file6,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+train7 = {key: value for key, value in train6.items()}
+train7['img_prefix'] = train_img_prefix7
+train7['ann_file'] = train_ann_file7
+
+train8 = {key: value for key, value in train6.items()}
+train8['img_prefix'] = train_img_prefix8
+train8['ann_file'] = train_ann_file8
+
+test_prefix = 'data/mixture/'
+test_img_prefix1 = test_prefix + 'IIIT5K/'
+test_img_prefix2 = test_prefix + 'svt/'
+test_img_prefix3 = test_prefix + 'icdar_2013/'
+test_img_prefix4 = test_prefix + 'icdar_2015/'
+test_img_prefix5 = test_prefix + 'svtp/'
+test_img_prefix6 = test_prefix + 'ct80/'
+
+test_ann_file1 = test_prefix + 'IIIT5K/test_label.txt'
+test_ann_file2 = test_prefix + 'svt/test_label.txt'
+test_ann_file3 = test_prefix + 'icdar_2013/test_label_1015.txt'
+test_ann_file4 = test_prefix + 'icdar_2015/test_label.txt'
+test_ann_file5 = test_prefix + 'svtp/test_label.txt'
+test_ann_file6 = test_prefix + 'ct80/test_label.txt'
+
+test1 = dict(
+    type=dataset_type,
+    img_prefix=test_img_prefix1,
+    ann_file=test_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+test2 = {key: value for key, value in test1.items()}
+test2['img_prefix'] = test_img_prefix2
+test2['ann_file'] = test_ann_file2
+
+test3 = {key: value for key, value in test1.items()}
+test3['img_prefix'] = test_img_prefix3
+test3['ann_file'] = test_ann_file3
+
+test4 = {key: value for key, value in test1.items()}
+test4['img_prefix'] = test_img_prefix4
+test4['ann_file'] = test_ann_file4
+
+test5 = {key: value for key, value in test1.items()}
+test5['img_prefix'] = test_img_prefix5
+test5['ann_file'] = test_ann_file5
+
+test6 = {key: value for key, value in test1.items()}
+test6['img_prefix'] = test_img_prefix6
+test6['ann_file'] = test_ann_file6
+
+data = dict(
+    samples_per_gpu=64,
+    workers_per_gpu=2,
+    train=dict(
+        type='ConcatDataset',
+        datasets=[
+            train1, train2, train3, train4, train5, train6, train7, train8
+        ]),
+    val=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]),
+    test=dict(
+        type='ConcatDataset',
+        datasets=[test1, test2, test3, test4, test5, test6]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/textrecog/seg/README.md
+++ b/configs/textrecog/seg/README.md
@ -0,0 +1,43 @@
+# SegOCR Simple Baseline.
+
+## Introduction
+
+[ALGORITHM]
+
+```bibtex
+@unpublished{key,
+  title={SegOCR Simple Baseline.},
+  author={},
+  note={Unpublished Manuscript},
+  year={2021}
+}
+```
+
+## Dataset
+
+### Train Dataset
+
+| trainset  | instance_num | repeat_num | source |
+| :-------: | :----------: | :--------: | :----: |
+| SynthText |   7266686    |     1      | synth  |
+
+### Test Dataset
+
+| testset | instance_num |   type    |
+| :-----: | :----------: | :-------: |
+| IIIT5K  |     3000     |  regular  |
+|   SVT   |     647      |  regular  |
+|  IC13   |     1015     |  regular  |
+|  CT80   |     288      | irregular |
+
+## Results and Models
+
+|Backbone|Neck|Head|||Regular Text|||Irregular Text|download
+| :-------------: | :-----: | :-----: | :------: | :-----: | :----: | :-----: | :-----: | :-----: | :-----: |
+|||||IIIT5K|SVT|IC13||CT80|
+|R31-1/16|FPNOCR|1x||90.9|81.8|90.7||80.9|[model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-72235b11.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) |
+
+**Notes:**
+
+-   `R31-1/16` means the size (both height and width ) of feature from backbone is 1/16 of input image.
+-   `1x` means the size (both height and width) of feature from head is the same with input image.
--- a/configs/textrecog/seg/seg_r31_1by16_fpnocr_academic.py
+++ b/configs/textrecog/seg/seg_r31_1by16_fpnocr_academic.py
@ -0,0 +1,160 @@
+_base_ = ['../../_base_/default_runtime.py']
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-4)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 5
+
+label_convertor = dict(
+    type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+
+model = dict(
+    type='SegRecognizer',
+    backbone=dict(
+        type='ResNet31OCR',
+        layers=[1, 2, 5, 3],
+        channels=[32, 64, 128, 256, 512, 512],
+        out_indices=[0, 1, 2, 3],
+        stage4_pool_cfg=dict(kernel_size=2, stride=2),
+        last_stage_pool=True),
+    neck=dict(
+        type='FPNOCR', in_channels=[128, 256, 512, 512], out_channels=256),
+    head=dict(
+        type='SegHead',
+        in_channels=256,
+        upsample_param=dict(scale_factor=2.0, mode='nearest')),
+    loss=dict(
+        type='SegLoss', seg_downsample_ratio=1.0, seg_with_loss_weight=True),
+    label_convertor=label_convertor)
+
+find_unused_parameters = True
+
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+
+gt_label_convertor = dict(
+    type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomPaddingOCR',
+        max_ratio=[0.15, 0.2, 0.15, 0.2],
+        box_type='char_quads'),
+    dict(type='OpencvToPil'),
+    dict(
+        type='RandomRotateImageBox',
+        min_angle=-17,
+        max_angle=17,
+        box_type='char_quads'),
+    dict(type='PilToOpencv'),
+    dict(
+        type='ResizeOCR',
+        height=64,
+        min_width=64,
+        max_width=512,
+        keep_aspect_ratio=True),
+    dict(
+        type='OCRSegTargets',
+        label_convertor=gt_label_convertor,
+        box_type='char_quads'),
+    dict(type='RandomRotateTextDet', rotate_ratio=0.5, max_angle=15),
+    dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
+    dict(type='ToTensorOCR'),
+    dict(type='FancyPCA'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(
+        type='CustomFormatBundle',
+        keys=['gt_kernels'],
+        visualize=dict(flag=False, boundary_key=None),
+        call_super=False),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_kernels'],
+        meta_keys=['filename', 'ori_shape', 'img_shape'])
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeOCR',
+        height=64,
+        min_width=64,
+        max_width=None,
+        keep_aspect_ratio=True),
+    dict(type='ToTensorOCR'),
+    dict(type='NormalizeOCR', **img_norm_cfg),
+    dict(type='CustomFormatBundle', call_super=False),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=['filename', 'ori_shape', 'img_shape'])
+]
+
+train_img_root = 'data/mixture/'
+
+train_img_prefix = train_img_root + 'SynthText'
+
+train_ann_file = train_img_root + 'SynthText/instances_train.txt'
+
+train = dict(
+    type='OCRSegDataset',
+    img_prefix=train_img_prefix,
+    ann_file=train_ann_file,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineJsonParser', keys=['file_name', 'annotations', 'text'])),
+    pipeline=train_pipeline,
+    test_mode=False)
+
+dataset_type = 'OCRDataset'
+test_prefix = 'data/mixture/'
+
+test_img_prefix1 = test_prefix + 'IIIT5K/'
+test_img_prefix2 = test_prefix + 'svt/'
+test_img_prefix3 = test_prefix + 'icdar_2013/'
+test_img_prefix4 = test_prefix + 'ct80/'
+
+test_ann_file1 = test_prefix + 'IIIT5K/test_label.txt'
+test_ann_file2 = test_prefix + 'svt/test_label.txt'
+test_ann_file3 = test_prefix + 'icdar_2013/test_label_1015.txt'
+test_ann_file4 = test_prefix + 'ct80/test_label.txt'
+
+test1 = dict(
+    type=dataset_type,
+    img_prefix=test_img_prefix1,
+    ann_file=test_ann_file1,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=1,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=test_pipeline,
+    test_mode=True)
+
+test2 = {key: value for key, value in test1.items()}
+test2['img_prefix'] = test_img_prefix2
+test2['ann_file'] = test_ann_file2
+
+test3 = {key: value for key, value in test1.items()}
+test3['img_prefix'] = test_img_prefix3
+test3['ann_file'] = test_ann_file3
+
+test4 = {key: value for key, value in test1.items()}
+test4['img_prefix'] = test_img_prefix4
+test4['ann_file'] = test_ann_file4
+
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=2,
+    train=dict(type='ConcatDataset', datasets=[train]),
+    val=dict(type='ConcatDataset', datasets=[test1, test2, test3, test4]),
+    test=dict(type='ConcatDataset', datasets=[test1, test2, test3, test4]))
+
+evaluation = dict(interval=1, metric='acc')
--- a/configs/textrecog/seg/seg_r31_1by16_fpnocr_toy_dataset.py
+++ b/configs/textrecog/seg/seg_r31_1by16_fpnocr_toy_dataset.py
@ -0,0 +1,35 @@
+_base_ = [
+    '../../_base_/default_runtime.py',
+    '../../_base_/recog_datasets/seg_toy_dataset.py'
+]
+
+# optimizer
+optimizer = dict(type='Adam', lr=1e-4)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step', step=[3, 4])
+total_epochs = 5
+
+label_convertor = dict(
+    type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
+
+model = dict(
+    type='SegRecognizer',
+    backbone=dict(
+        type='ResNet31OCR',
+        layers=[1, 2, 5, 3],
+        channels=[32, 64, 128, 256, 512, 512],
+        out_indices=[0, 1, 2, 3],
+        stage4_pool_cfg=dict(kernel_size=2, stride=2),
+        last_stage_pool=True),
+    neck=dict(
+        type='FPNOCR', in_channels=[128, 256, 512, 512], out_channels=256),
+    head=dict(
+        type='SegHead',
+        in_channels=256,
+        upsample_param=dict(scale_factor=2.0, mode='nearest')),
+    loss=dict(
+        type='SegLoss', seg_downsample_ratio=1.0, seg_with_loss_weight=False),
+    label_convertor=label_convertor)
+
+find_unused_parameters = True
--- a/demo/demo_text_det.jpg
+++ b/demo/demo_text_det.jpg
--- a/demo/demo_text_recog.jpg
+++ b/demo/demo_text_recog.jpg
--- a/demo/image_demo.py
+++ b/demo/image_demo.py
@ -0,0 +1,44 @@
+from argparse import ArgumentParser
+
+import mmcv
+
+from mmdet.apis import init_detector
+from mmocr.apis.inference import model_inference
+from mmocr.datasets import build_dataset  # noqa: F401
+from mmocr.models import build_detector  # noqa: F401
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('img', help='Image file.')
+    parser.add_argument('config', help='Config file.')
+    parser.add_argument('checkpoint', help='Checkpoint file.')
+    parser.add_argument('save_path', help='Path to save visualized image.')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference.')
+    parser.add_argument(
+        '--imshow',
+        action='store_true',
+        help='Whether show image with OpenCV.')
+    args = parser.parse_args()
+
+    # build the model from a config file and a checkpoint file
+    model = init_detector(args.config, args.checkpoint, device=args.device)
+    if model.cfg.data.test['type'] == 'ConcatDataset':
+        model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][
+            0].pipeline
+
+    # test a single image
+    result = model_inference(model, args.img)
+    print(f'result: {result}')
+
+    # show the results
+    img = model.show_result(args.img, result, out_file=None, show=False)
+
+    mmcv.imwrite(img, args.save_path)
+    if args.imshow:
+        mmcv.imshow(img, 'predicted results')
+
+
+if __name__ == '__main__':
+    main()
--- a/demo/webcam_demo.py
+++ b/demo/webcam_demo.py
@ -0,0 +1,52 @@
+import argparse
+
+import cv2
+import torch
+
+from mmdet.apis import init_detector
+from mmocr.apis import model_inference
+from mmocr.datasets import build_dataset  # noqa: F401
+from mmocr.models import build_detector  # noqa: F401
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDetection webcam demo.')
+    parser.add_argument('config', help='Test config file path.')
+    parser.add_argument('checkpoint', help='Checkpoint file.')
+    parser.add_argument(
+        '--device', type=str, default='cuda:0', help='CPU/CUDA device option.')
+    parser.add_argument(
+        '--camera-id', type=int, default=0, help='Camera device id.')
+    parser.add_argument(
+        '--score-thr', type=float, default=0.5, help='Bbox score threshold.')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    device = torch.device(args.device)
+
+    model = init_detector(args.config, args.checkpoint, device=device)
+    if model.cfg.data.test['type'] == 'ConcatDataset':
+        model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][
+            0].pipeline
+
+    camera = cv2.VideoCapture(args.camera_id)
+
+    print('Press "Esc", "q" or "Q" to exit.')
+    while True:
+        ret_val, img = camera.read()
+        result = model_inference(model, img)
+
+        ch = cv2.waitKey(1)
+        if ch == 27 or ch == ord('q') or ch == ord('Q'):
+            break
+
+        model.show_result(
+            img, result, score_thr=args.score_thr, wait_time=1, show=True)
+
+
+if __name__ == '__main__':
+    main()
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -0,0 +1,28 @@
+ARG PYTORCH="1.5"
+ARG CUDA="10.1"
+ARG CUDNN="7"
+
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+
+RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN conda clean --all
+RUN pip install mmcv-full==1.2.6+torch1.5.0+cu101 -f https://download.openmmlab.com/mmcv/dist/index.html
+
+RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdet
+WORKDIR /mmdet
+RUN git checkout -b v2.9.0 v2.9.0
+RUN pip install -r requirements.txt
+RUN pip install .
+
+RUN git clone https://github.com/open-mmlab/mmocr.git /mmocr
+WORKDIR /mmocr
+ENV FORCE_CUDA="1"
+RUN pip install -r requirements.txt
+RUN pip install --no-cache-dir -e .
--- a/docs/Makefile
+++ b/docs/Makefile
@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/api.rst
+++ b/docs/api.rst
@ -0,0 +1,15 @@
+API Reference
+=============
+
+mmocr.apis
+-------------
+.. automodule:: mmocr.apis
+    :members:
+
+mmocr.core
+-------------
+
+evaluation
+^^^^^^^^^^
+.. automodule:: mmocr.core.evaluation
+    :members:
--- a/docs/changelog.md
+++ b/docs/changelog.md
@ -0,0 +1 @@
+## Changelog
--- a/docs/code_of_conduct.md
+++ b/docs/code_of_conduct.md
@ -0,0 +1,93 @@
+<a id="markdown-contributor-covenant-code-of-conduct" name="contributor-covenant-code-of-conduct"></a>
+# Contributor Covenant Code of Conduct
+<!-- TOC -->
+
+- [Contributor Covenant Code of Conduct](#contributor-covenant-code-of-conduct)
+  - [Our Pledge](#our-pledge)
+  - [Our Standards](#our-standards)
+  - [Our Responsibilities](#our-responsibilities)
+  - [Scope](#scope)
+  - [Enforcement](#enforcement)
+  - [Attribution](#attribution)
+
+<!-- /TOC -->
+<a id="markdown-our-pledge" name="our-pledge"></a>
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+<a id="markdown-our-standards" name="our-standards"></a>
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+<a id="markdown-our-responsibilities" name="our-responsibilities"></a>
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+<a id="markdown-scope" name="scope"></a>
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+<a id="markdown-enforcement" name="enforcement"></a>
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at chenkaidev@gmail.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+<a id="markdown-attribution" name="attribution"></a>
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
--- a/docs/conf.py
+++ b/docs/conf.py
@ -0,0 +1,83 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+
+import os
+import subprocess
+import sys
+
+sys.path.insert(0, os.path.abspath('..'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'MMOCR'
+copyright = '2020-2030, OpenMMLab'
+author = 'OpenMMLab'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1.0'
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+    'recommonmark',
+    'sphinx_markdown_tables',
+]
+
+autodoc_mock_imports = ['torch', 'torchvision', 'mmcv', 'mmocr.version']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.md': 'markdown',
+}
+
+# The master toctree document.
+master_doc = 'index'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+master_doc = 'index'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = []
+
+
+def builder_inited_handler(app):
+    subprocess.run(['./merge_docs.sh'])
+    subprocess.run(['./stats.py'])
+
+
+def setup(app):
+    app.connect('builder-inited', builder_inited_handler)
--- a/docs/contributing.md
+++ b/docs/contributing.md
@ -0,0 +1,187 @@
+<a id="markdown-contributing-to-mmocr" name="contributing-to-mmocr"></a>
+# Contributing to mmocr
+
+All kinds of contributions are welcome, including but not limited to the following.
+
+- Fixes (typo, bugs)
+- New features and components
+- Enhancement like function speedup
+<!-- TOC -->
+
+- [Contributing to mmocr](#contributing-to-mmocr)
+  - [Workflow](#workflow)
+    - [Step 1: Create a Fork](#step-1-create-a-fork)
+    - [Step 2: Develop a new feature](#step-2-develop-a-new-feature)
+      - [Step 2.1: Keep your fork up to date](#step-21-keep-your-fork-up-to-date)
+      - [<span id = "step2.2">Step 2.2: Create a feature branch</span>](#step-22-create-a-feature-branch)
+        - [Create an issue on github](#create-an-issue-on-github)
+        - [Create branch](#create-branch)
+      - [Step 2.3: Develop and test <your_new_feature>](#step-23-develop-and-test-your_new_feature)
+      - [Step 2.4: Prepare to Pull Request](#step-24-prepare-to-pull-request)
+        - [Merge official repo updates to your fork](#merge-official-repo-updates-to-your-fork)
+        - [Push <your_new_feature> branch to your remote forked repo,](#push-your_new_feature-branch-to-your-remote-forked-repo)
+      - [Step 2.5: Create a Pull Request](#step-25-create-a-pull-request)
+      - [Step 2.6: Review code](#step-26-review-code)
+      - [Step 2.7: Revise <your_new_feature>  (optional)](#step-27-revise-your_new_feature--optional)
+      - [Step 2.8: Delete <your_new_feature> branch if your PR is accepted.](#step-28-delete-your_new_feature-branch-if-your-pr-is-accepted)
+  - [Code style](#code-style)
+    - [Python](#python)
+    - [C++ and CUDA](#c-and-cuda)
+
+<!-- /TOC -->
+<a id="markdown-workflow" name="workflow"></a>
+## Workflow
+
+This document describes the fork & merge request workflow that should be used when contributing to **MMOCR**.
+
+The official public [repository](https://github.com/open-mmlab/mmocr) holds two branches with an infinite lifetime only:
+ master
+ develop
+
+The *master* branch is the main branch where the source code of **HEAD** always reflects a *production-ready state*.
+
+The *develop* branch is the branch where the source code of **HEAD** always reflects a state with the latest development changes for the next release.
+
+Feature branches are used to develop new features for the upcoming or a distant future release.
+
+![](res/git-workflow-master-develop.png)
+
+All new developers to **MMOCR** need to follow the following steps:
+
+<a id="markdown-step-1-create-a-fork" name="step-1-create-a-fork"></a>
+### Step 1: Create a Fork
+
+1. Fork the repo on GitHub or GitLab to your personal account. Click the `Fork` button on the [project page](https://github.com/open-mmlab/mmocr).
+
+2. Clone your new forked repo to your computer.
+```
+git clone https://github.com/<your name>/mmocr.git
+```
+3. Add the official repo as an upstream:
+```
+git remote add upstream https://github.com/open-mmlab/mmocr.git
+```
+
+<a id="markdown-step-2-develop-a-new-feature" name="step-2-develop-a-new-feature"></a>
+### Step 2: Develop a new feature
+
+<a id="markdown-step-21-keep-your-fork-up-to-date" name="step-21-keep-your-fork-up-to-date"></a>
+#### Step 2.1: Keep your fork up to date
+
+Whenever you want to update your fork with the latest upstream changes, you need to fetch the upstream repo's branches and latest commits to bring them into your repository:
+
+```
+# Fetch from upstream remote
+git fetch upstream
+
+# Update your master branch
+git checkout master
+git rebase upstream/master
+git push origin master
+
+# Update your develop branch
+git checkout develop
+git rebase upsteam/develop
+git push origin develop
+```
+
+<a id="markdown-span-id--step22step-22-create-a-feature-branchspan" name="span-id--step22step-22-create-a-feature-branchspan"></a>
+#### <span id = "step2.2">Step 2.2: Create a feature branch</span>
+<a id="markdown-create-an-issue-on-githubhttpsgithubcomopen-mmlabmmocr" name="create-an-issue-on-githubhttpsgithubcomopen-mmlabmmocr"></a>
+##### Create an issue on [github](https://github.com/open-mmlab/mmocr)
+- The title of the issue should be one of the following formats: `[Feature]: xxx`, `[Fix]: xxx`, `[Enhance]: xxx`, `[Refactor]: xxx`.
+- More details can be written in comments.
+
+<a id="markdown-create-branch" name="create-branch"></a>
+##### Create branch
+```
+git checkout -b feature/iss_<index> develop
+# index is the issue number above
+```
+Till now, your fork has three branches as follows:
+
+![](res/git-workflow-feature.png)
+
+<a id="markdown-step-23-develop-and-test-your_new_feature" name="step-23-develop-and-test-your_new_feature"></a>
+#### Step 2.3: Develop and test <your_new_feature>
+
+Develop your new feature and test it to make sure it works well.
+
+Pls run
+```
+pre-commit run --all-files
+pytest tests
+```
+and fix all failures before every git commit.
+```
+git commit -m "fix #<issue_index>: <commit_message>"
+```
+**Note:**
+- <issue_index> is the [issue](#step2.2) number.
+
+<a id="markdown-step-24-prepare-to-pull-request" name="step-24-prepare-to-pull-request"></a>
+#### Step 2.4: Prepare to Pull Request
+- Make sure to link your pull request to the related issue. Please refer to the [instructon](https://docs.github.com/en/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue)
+
+
+<a id="markdown-merge-official-repo-updates-to-your-fork" name="merge-official-repo-updates-to-your-fork"></a>
+##### Merge official repo updates to your fork
+
+```
+# fetch from upstream remote. i.e., the official repo
+git fetch upstream
+
+# update the develop branch of your fork
+git checkout develop
+git rebase upsteam/develop
+git push origin develop
+
+# update the <your_new_feature> branch
+git checkout <your_new_feature>
+git rebase develop
+# solve conflicts if any and Test
+```
+
+<a id="markdown-push-your_new_feature-branch-to-your-remote-forked-repo" name="push-your_new_feature-branch-to-your-remote-forked-repo"></a>
+##### Push <your_new_feature> branch to your remote forked repo,
+```
+git checkout <your_new_feature>
+git push origin <your_new_feature>
+```
+<a id="markdown-step-25-create-a-pull-request" name="step-25-create-a-pull-request"></a>
+#### Step 2.5: Create a Pull Request
+
+Go to the page for your fork on GitHub, select your new feature branch, and click the pull request button to integrate your feature branch into the upstream remote’s develop branch.
+
+<a id="markdown-step-26-review-code" name="step-26-review-code"></a>
+#### Step 2.6: Review code
+
+
+<a id="markdown-step-27-revise-your_new_feature--optional" name="step-27-revise-your_new_feature--optional"></a>
+#### Step 2.7: Revise <your_new_feature>  (optional)
+If PR is not accepted, pls follow Step 2.1, 2.3, 2.4 and 2.5 till your PR is accepted.
+
+<a id="markdown-step-28-delete-your_new_feature-branch-if-your-pr-is-accepted" name="step-28-delete-your_new_feature-branch-if-your-pr-is-accepted"></a>
+#### Step 2.8: Delete <your_new_feature> branch if your PR is accepted.
+```
+git branch -d <your_new_feature>
+git push origin :<your_new_feature>
+```
+
+<a id="markdown-code-style" name="code-style"></a>
+## Code style
+
+<a id="markdown-python" name="python"></a>
+### Python
+We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
+
+We use the following tools for linting and formatting:
+- [flake8](http://flake8.pycqa.org/en/latest/): linter
+- [yapf](https://github.com/google/yapf): formatter
+- [isort](https://github.com/timothycrosley/isort): sort imports
+
+>Before you create a PR, make sure that your code lints and is formatted by yapf.
+
+<a id="markdown-c-and-cuda" name="c-and-cuda"></a>
+### C++ and CUDA
+We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
--- a/docs/datasets.md
+++ b/docs/datasets.md
@ -0,0 +1,208 @@
+<a id="markdown-datasets-preparation" name="datasets-preparation"></a>
+# Datasets Preparation
+This page lists the datasets which are commonly used in text detection, text recognition and key information extraction, and their download links.
+<!-- TOC -->
+
+- [Datasets Preparation](#datasets-preparation)
+  - [Text Detection](#text-detection)
+  - [Text Recognition](#text-recognition)
+  - [Key Information Extraction](#key-information-extraction)
+
+<!-- /TOC -->
+<a id="markdown-text-detection" name="text-detection"></a>
+## Text Detection
+**The structure of the text detection dataset directory is organized as follows.**
+```
+├── ctw1500
+│   ├── imgs
+│   ├── instances_test.json
+│   └── instances_training.json
+├── icdar2015
+│   ├── imgs
+│   ├── instances_test.json
+│   └── instances_training.json
+├── icdar2017
+│   ├── imgs
+│   ├── instances_training.json
+│   └── instances_val.json
+├── synthtext
+│   ├── imgs
+│   └── instances_training.lmdb
+```
+|  Dataset  |   |           Images           |   |                                              |             Annotation Files             |                                          |   | Note |   |
+|:---------:|:-:|:--------------------------:|:-:|:--------------------------------------------:|:---------------------------------------:|:----------------------------------------:|:-:|:----:|---|
+|           |   |                            |   | training                                     | validation                               | testing                                  |   |      |   |
+| CTW1500   |   | [homepage](https://github.com/Yuliang-Liu/Curve-Text-Detector) |   | [instances_training.json](https://download.openmmlab.com/mmocr/data/ctw1500/instances_training.json) | -                                       | [instances_test.json](https://download.openmmlab.com/mmocr/data/ctw1500/instances_test.json) |   |      |   |
+| ICDAR2015 |   | [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads) |   | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) |                    -                     | [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json) |   |      |   |
+| ICDAR2017 |   | [homepage](https://rrc.cvc.uab.es/?ch=8&com=downloads) |  [renamed_imgs](https://download.openmmlab.com/mmocr/data/icdar2017/renamed_imgs.tar) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_training.json) | [instances_val.json](https://openmmlab) |                [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_test.json)                      |   |      |   |
+| Synthtext |   | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) |   |  [instances_training.lmdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb)|-| | | |
+
+- For `icdar2015`:
+  - Step1: Download `ch4_training_images.zip` and `ch4_test_images.zip` from [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads)
+  - Step2: Download [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) and [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json)
+  - Step3:
+  ```bash
+  mkdir icdar2015 && cd icdar2015
+  mv /path/to/instances_training.json .
+  mv /path/to/instances_test.json .
+
+  mkdir imgs && cd imgs
+  ln -s /path/to/ch4_training_images training
+  ln -s /path/to/ch4_test_images test
+  ```
+- For `icdar2017`:
+  - To avoid the effect of rotation when load `jpg` with opencv, We provide re-saved `png` format image in [renamed_images](https://download.openmmlab.com/mmocr/data/icdar2017/renamed_imgs.tar). You can copy these images to `imgs`.
+
+<a id="markdown-text-recognition" name="text-recognition"></a>
+## Text Recognition
+**The structure of the text recognition dataset directory is organized as follows.**
+
+```
+├── mixture
+│   ├── coco_text
+│   │   ├── train_label.txt
+│   │   ├── train_words
+│   ├── icdar_2011
+│   │   ├── training_label.txt
+│   │   ├── Challenge1_Training_Task3_Images_GT
+│   ├── icdar_2013
+│   │   ├── train_label.txt
+│   │   ├── test_label_1015.txt
+│   │   ├── test_label_1095.txt
+│   │   ├── Challenge2_Training_Task3_Images_GT
+│   │   ├── Challenge2_Test_Task3_Images
+│   ├── icdar_2015
+│   │   ├── train_label.txt
+│   │   ├── test_label.txt
+│   │   ├── ch4_training_word_images_gt
+│   │   ├── ch4_test_word_images_gt
+│   ├── III5K
+│   │   ├── train_label.txt
+│   │   ├── test_label.txt
+│   │   ├── train
+│   │   ├── test
+│   ├── ct80
+│   │   ├── test_label.txt
+│   │   ├── image
+│   ├── svt
+│   │   ├── test_label.txt
+│   │   ├── image
+│   ├── svtp
+│   │   ├── test_label.txt
+│   │   ├── image
+│   ├── Synth90k
+│   │   ├── shuffle_labels.txt
+│   │   ├── label.lmdb
+│   │   ├── mnt
+│   ├── SynthText
+│   │   ├── shuffle_labels.txt
+│   │   ├── instances_train.txt
+│   │   ├── label.lmdb
+│   │   ├── synthtext
+│   ├── SynthAdd
+│   │   ├── label.txt
+│   │   ├── SynthText_Add
+
+```
+|   Dataset  |   |                                       images                                      |                                            annotation file                                           |                                             annotation file                                             | Note |
+|:----------:|:-:|:---------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------:|:----:|
+||   | |training | test |      |
+| coco_text ||[homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) |[train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/coco_text/train_label.txt) |- |      |
+| icdar_2011 ||[homepage](http://www.cvc.uab.es/icdar2011competition/?com=downloads) |[train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) |- |      |
+| icdar_2013 |   | [homepage](https://rrc.cvc.uab.es/?ch=2&com=downloads)                                | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/train_label.txt)      | [test_label_1015.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/test_label_1015.txt) |      |
+| icdar_2015 |   | [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads)                                | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt)      | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/test_label.txt)           |      |
+| IIIT5K     |   | [homepage](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html)        | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/train_label.txt)          | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/test_label.txt)               |      |
+| ct80       |   | - |-|[test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/ct80/test_label.txt)||
+| svt        |   | [homepage](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset) | -                                                                                                    | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svt/test_label.txt)                  |      |
+| svtp        |   | - | -                                                                                                    | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svtp/test_label.txt)                  |      |
+| Synth90k   |   | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/)                               | [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/Synth90k/shuffle_labels.txt) \| [label.lmdb](https://download.openmmlab.com/mmocr/data/mixture/Synth90k/label.lmdb)  | -  |      |
+| SynthText  |   | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/)                          | [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/shuffle_labels.txt) \| [instances_train.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/instances_train.txt) \| [label.lmdb](https://download.openmmlab.com/mmocr/data/mixture/SynthText/label.lmdb) |    -  |      |
+| SynthAdd   |   |       [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg)  (code:627x)                                                                           |   [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthAdd/label.txt)|- |      |
+
+- For `icdar_2013`:
+  - Step1: Download `Challenge2_Test_Task3_Images.zip` and `Challenge2_Training_Task3_Images_GT.zip` from [homepage](https://rrc.cvc.uab.es/?ch=2&com=downloads)
+  - Step2: Download [test_label_1015.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/test_label_1015.txt) and [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/train_label.txt)
+- For `icdar_2015`:
+  - Step1: Download `ch4_training_word_images_gt.zip` and `ch4_test_word_images_gt.zip` from [homepage](https://rrc.cvc.uab.es/?ch=4&com=downloads)
+  - Step2: Download [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) and [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/test_label.txt)
+- For `IIIT5K`:
+  - Step1: Download `IIIT5K-Word_V3.0.tar.gz` from [homepage](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html)
+  - Step2: Download [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/train_label.txt) and [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/test_label.txt)
+- For `svt`:
+  - Step1: Download `svt.zip` form [homepage](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset)
+  - Step2: Download [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svt/test_label.txt)
+- For `ct80`:
+  - Step1: Download [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/ct80/test_label.txt)
+- For `svtp`:
+  - Step1: Download [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svtp/test_label.txt)
+- For `coco_text`:
+  - Step1: Download from [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads)
+  - Step2: Download [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/coco_text/train_label.txt)
+
+- For `Syn90k`:
+  - Step1: Download `mjsynth.tar.gz` from [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/)
+  - Step2: Download [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/Synth90k/shuffle_labels.txt)
+  - Step3:
+  ```bash
+  mkdir Syn90k && cd Syn90k
+
+  mv /path/to/mjsynth.tar.gz .
+
+  tar -xzf mjsynth.tar.gz
+
+  mv /path/to/shuffle_labels.txt .
+
+  # create soft link
+  cd /path/to/mmocr/data/mixture
+
+  ln -s /path/to/Syn90k Syn90k
+  ```
+- For `SynthText`:
+  - Step1: Download `SynthText.zip` from [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/)
+  - Step2: Download [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/shuffle_labels.txt)
+  - Step3: Download [instances_train.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/instances_train.txt)
+  - Step4:
+  ```bash
+  unzip SynthText.zip
+
+  cd SynthText
+
+  mv /path/to/shuffle_labels.txt .
+
+  # create soft link
+  cd /path/to/mmocr/data/mixture
+
+  ln -s /path/to/SynthText SynthText
+  ```
+- For `SynthAdd`:
+  - Step1: Download `SynthText_Add.zip` from [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x))
+  - Step2: Download [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthAdd/label.txt)
+  - Step3:
+  ```bash
+  mkdir SynthAdd && cd SynthAdd
+
+  mv /path/to/SynthText_Add.zip .
+
+  unzip SynthText_Add.zip
+
+  mv /path/to/label.txt .
+
+  # create soft link
+  cd /path/to/mmocr/data/mixture
+
+  ln -s /path/to/SynthAdd SynthAdd
+  ```
+
+<a id="markdown-key-information-extraction" name="key-information-extraction"></a>
+## Key Information Extraction
+**The structure of the key information extraction dataset directory is organized as follows.**
+```
+└── wildreceipt
+  ├── anno_files
+  ├── class_list.txt
+  ├── dict.txt
+  ├── image_files
+  ├── test.txt
+  └── train.txt
+```
+- Download [wildreceipt.tar](https://download.openmmlab.com/mmocr/data/wildreceipt.tar)
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@ -0,0 +1,369 @@
+<a id="markdown-getting-started" name="getting-started"></a>
+# Getting Started
+
+This page provides basic tutorials on the usage of MMOCR.
+For the installation instructions, please see [install.md](install.md).
+<!-- TOC -->
+
+- [Getting Started](#getting-started)
+  - [Inference with Pretrained Models](#inference-with-pretrained-models)
+    - [Test a Single Image](#test-a-single-image)
+    - [Test Multiple Images](#test-multiple-images)
+    - [Test a Dataset](#test-a-dataset)
+      - [Test with Single/Multiple GPUs](#test-with-singlemultiple-gpus)
+        - [Optional Arguments](#optional-arguments)
+      - [Test with Slurm](#test-with-slurm)
+        - [Optional Arguments](#optional-arguments-1)
+  - [Train a Model](#train-a-model)
+    - [Train with Single/Multiple GPUs](#train-with-singlemultiple-gpus)
+      - [Train with Toy Dataset.](#train-with-toy-dataset)
+    - [Train with Slurm](#train-with-slurm)
+    - [Launch Multiple Jobs on a Single Machine](#launch-multiple-jobs-on-a-single-machine)
+  - [Useful Tools](#useful-tools)
+    - [Publish a Model](#publish-a-model)
+  - [Customized Settings](#customized-settings)
+    - [Flexible Dataset](#flexible-dataset)
+      - [Encoder-Decoder-Based Text Recognition Task](#encoder-decoder-based-text-recognition-task)
+        - [Optional Arguments:](#optional-arguments-2)
+      - [Segmentation-Based Text Recognition Task](#segmentation-based-text-recognition-task)
+      - [Text Detection Task](#text-detection-task)
+    - [COCO-like Dataset](#coco-like-dataset)
+
+<!-- /TOC -->
+
+<a id="markdown-inference-with-pretrained-models" name="inference-with-pretrained-models"></a>
+## Inference with Pretrained Models
+
+We provide testing scripts to evaluate a full dataset, as well as some task-specific image demos.
+
+<a id="markdown-test-a-single-image" name="test-a-single-image"></a>
+### Test a Single Image
+
+You can use the following command to test a single image with one GPU.
+
+```shell
+python demo/image_demo.py ${TEST_IMG} ${CONFIG_FILE} ${CHECKPOINT_FILE} ${SAVE_PATH} [--imshow] [--device ${GPU_ID}]
+```
+
+If `--imshow` is specified, the demo will also show the image with OpenCV. For example:
+
+```shell
+python demo/image_demo.py demo/demo_text_det.jpg configs/xxx.py xxx.pth demo/demo_text_det_pred.jpg
+```
+
+The predicted result will be saved as `demo/demo_text_det_pred.jpg`.
+
+<a id="markdown-test-multiple-images" name="test-multiple-images"></a>
+### Test Multiple Images
+
+```shell
+# for text detection
+sh tools/test_imgs.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${IMG_ROOT_PATH} ${IMG_LIST} ${RESULTS_DIR}
+
+# for text recognition
+sh tools/ocr_test_imgs.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${IMG_ROOT_PATH} ${IMG_LIST} ${RESULTS_DIR}
+```
+It will save both the prediction results and visualized images to `${RESULTS_DIR}`
+
+<a id="markdown-test-a-dataset" name="test-a-dataset"></a>
+### Test a Dataset
+
+MMOCR implements **distributed** testing with `MMDistributedDataParallel`. (Please refer to [datasets.md](datasets.md) to prepare your datasets)
+
+<a id="markdown-test-with-singlemultiple-gpus" name="test-with-singlemultiple-gpus"></a>
+#### Test with Single/Multiple GPUs
+
+You can use the following command to test a dataset with single/multiple GPUs.
+
+```shell
+./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--eval ${EVAL_METRIC}]
+```
+For example,
+
+```shell
+./tools/dist_test.sh configs/example_config.py work_dirs/example_exp/example_model_20200202.pth 1 --eval hmean-iou
+```
+<a id="markdown-optional-arguments" name="optional-arguments"></a>
+##### Optional Arguments
+
+- `--eval`: Specify the evaluation metric. For text detection, the metric should be either 'hmean-ic13' or 'hmean-iou'. For text recognition, the metric should be 'acc'.
+
+<a id="markdown-test-with-slurm" name="test-with-slurm"></a>
+#### Test with Slurm
+
+If you run MMOCR on a cluster managed with [Slurm](https://slurm.schedmd.com/), you can use the script `slurm_test.sh`.
+
+```shell
+[GPUS=${GPUS}] ./tools/slurm_test.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--eval ${EVAL_METRIC}]
+```
+Here is an example of using 8 GPUs to test an example model on the 'dev' partition with job name 'test_job'.
+
+```shell
+GPUS=8 ./tools/slurm_test.sh dev test_job configs/example_config.py work_dirs/example_exp/example_model_20200202.pth --eval hmean-iou
+```
+
+You can check [slurm_test.sh](https://github.com/open-mmlab/mmocr/blob/master/tools/slurm_test.sh) for full arguments and environment variables.
+
+
+<a id="markdown-optional-arguments-1" name="optional-arguments-1"></a>
+##### Optional Arguments
+
+- `--eval`: Specify the evaluation metric. For text detection, the metric should be either 'hmean-ic13' or 'hmean-iou'. For text recognition, the metric should be 'acc'.
+
+
+<a id="markdown-train-a-model" name="train-a-model"></a>
+## Train a Model
+
+MMOCR implements **distributed** training with `MMDistributedDataParallel`. (Please refer to [datasets.md](datasets.md) to prepare your datasets)
+
+All outputs (log files and checkpoints) will be saved to a working directory specified by `work_dir` in the config file.
+
+By default, we evaluate the model on the validation set after several iterations. You can change the evaluation interval by adding the interval argument in the training config as follows:
+```python
+evaluation = dict(interval=1, by_epoch=True)  # This evaluates the model per epoch.
+```
+
+
+<a id="markdown-train-with-singlemultiple-gpus" name="train-with-singlemultiple-gpus"></a>
+### Train with Single/Multiple GPUs
+
+```shell
+./tools/dist_train.sh ${CONFIG_FILE} ${WORK_DIR} ${GPU_NUM} [optional arguments]
+```
+
+Optional Arguments:
+
+- `--no-validate` (**not suggested**): By default, the codebase will perform evaluation at every k-th iteration during training. To disable this behavior, use `--no-validate`.
+
+<a id="markdown-train-with-toy-dataset" name="train-with-toy-dataset"></a>
+#### Train with Toy Dataset.
+We provide a toy dataset under `tests/data`, and you can train a toy model directly, before the academic dataset is prepared.
+
+For example, train a text recognition task with `seg` method and toy dataset,
+```
+./tools/dist_train.sh configs/textrecog/seg/seg_r31_1by16_fpnocr_toy_dataset.py work_dirs/seg 1
+```
+
+And train a text recognition task with `sar` method and toy dataset,
+```
+./tools/dist_train.sh configs/textrecog/sar/sar_r31_parallel_decoder_toy_dataset.py work_dirs/sar 1
+```
+
+<a id="markdown-train-with-slurm" name="train-with-slurm"></a>
+### Train with Slurm
+
+If you run MMOCR on a cluster managed with [Slurm](https://slurm.schedmd.com/), you can use the script `slurm_train.sh`.
+
+```shell
+[GPUS=${GPUS}] ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} ${WORK_DIR}
+```
+
+Here is an example of using 8 GPUs to train a text detection model on the dev partition.
+
+```shell
+GPUS=8 ./tools/slurm_train.sh dev psenet-ic15 configs/textdet/psenet/psenet_r50_fpnf_sbn_1x_icdar2015.py /nfs/xxxx/psenet-ic15
+```
+
+You can check [slurm_train.sh](https://github.com/open-mmlab/mmocr/blob/master/tools/slurm_train.sh) for full arguments and environment variables.
+
+<a id="markdown-launch-multiple-jobs-on-a-single-machine" name="launch-multiple-jobs-on-a-single-machine"></a>
+### Launch Multiple Jobs on a Single Machine
+
+If you launch multiple jobs on a single machine, e.g., 2 jobs of 4-GPU training on a machine with 8 GPUs,
+you need to specify different ports (29500 by default) for each job to avoid communication conflicts.
+
+If you use `dist_train.sh` to launch training jobs, you can set the ports in the command shell.
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG_FILE} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG_FILE} 4
+```
+
+If you launch training jobs with Slurm, you need to modify the config files to set different communication ports.
+
+In `config1.py`,
+```python
+dist_params = dict(backend='nccl', port=29500)
+```
+
+In `config2.py`,
+```python
+dist_params = dict(backend='nccl', port=29501)
+```
+
+Then you can launch two jobs with `config1.py` ang `config2.py`.
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py ${WORK_DIR}
+CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 ./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py ${WORK_DIR}
+```
+
+
+<a id="markdown-useful-tools" name="useful-tools"></a>
+## Useful Tools
+
+We provide numerous useful tools under `mmocr/tools` directory.
+
+<a id="markdown-publish-a-model" name="publish-a-model"></a>
+### Publish a Model
+
+Before you upload a model to AWS, you may want to
+(1) convert the model weights to CPU tensors, (2) delete the optimizer states and
+(3) compute the hash of the checkpoint file and append the hash id to the filename.
+
+```shell
+python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME}
+```
+
+E.g.,
+
+```shell
+python tools/publish_model.py work_dirs/psenet/latest.pth psenet_r50_fpnf_sbn_1x_20190801.pth
+```
+
+The final output filename will be `psenet_r50_fpnf_sbn_1x_20190801-{hash id}.pth`.
+
+<a id="markdown-customized-settings" name="customized-settings"></a>
+## Customized Settings
+
+<a id="markdown-flexible-dataset" name="flexible-dataset"></a>
+### Flexible Dataset
+To support the tasks of `text detection`, `text recognition` and `key information extraction`, we have designed a new type of dataset which consists of `loader` and `parser` to load and parse different types of annotation files.
+- **loader**: Load the annotation file. There are two types of loader, `HardDiskLoader` and `LmdbLoader`
+  - `HardDiskLoader`: Load `txt` format annotation file from hard disk to memory.
+  - `LmdbLoader`: Load `lmdb` format annotation file with lmdb backend, which is very useful for **extremely large** annotation files to avoid out-of-memory problem when ten or more GPUs are used, since each GPU will start multiple processes to load annotation file to memory.
+- **parser**: Parse the annotation file line-by-line and return with `dict` format. There are two types of parser, `LineStrParser` and `LineJsonParser`.
+  - `LineStrParser`: Parse one line in ann file while treating it as a string and separating it to several parts by a `separator`. It can be used on tasks with simple annotation files such as text recognition where each line of the annotation files contains the `filename` and `label` attribute only.
+  - `LineJsonParser`: Parse one line in ann file while treating it as a json-string and using `json.loads` to convert it to `dict`. It can be used on tasks with complex annotation files such as text detection where each line of the annotation files contains multiple attributes (e.g. `filename`, `height`, `width`, `box`, `segmentation`, `iscrowd`, `category_id`, etc.).
+
+Here we show some examples of using different combination of `loader` and `parser`.
+
+<a id="markdown-encoder-decoder-based-text-recognition-task" name="encoder-decoder-based-text-recognition-task"></a>
+#### Encoder-Decoder-Based Text Recognition Task
+```python
+dataset_type = 'OCRDataset'
+img_prefix = 'tests/data/ocr_toy_dataset/imgs'
+train_anno_file = 'tests/data/ocr_toy_dataset/label.txt'
+train = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=10,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+```
+You can check the content of the annotation file in `tests/data/ocr_toy_dataset/label.txt`.
+The combination of `HardDiskLoader` and `LineStrParser` will return a dict for each file by calling `__getitem__`: `{'filename': '1223731.jpg', 'text': 'GRAND'}`.
+
+<a id="markdown-optional-arguments" name="optional-arguments"></a>
+##### Optional Arguments:
+
+- `repeat`: The number of repeated lines in the annotation files. For example, if there are `10` lines in the annotation file, setting `repeat=10` will generate a corresponding annotation file with size `100`.
+
+If the annotation file is extreme large, you can convert it from txt format to lmdb format with the following command:
+```python
+python tools/data_converter/txt2lmdb.py -i ann_file.txt -o ann_file.lmdb
+```
+
+After that, you can use `LmdbLoader` in dataset like below.
+```python
+img_prefix = 'tests/data/ocr_toy_dataset/imgs'
+train_anno_file = 'tests/data/ocr_toy_dataset/label.lmdb'
+train = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=train_anno_file,
+    loader=dict(
+        type='LmdbLoader',
+        repeat=10,
+        parser=dict(
+            type='LineStrParser',
+            keys=['filename', 'text'],
+            keys_idx=[0, 1],
+            separator=' ')),
+    pipeline=train_pipeline,
+    test_mode=False)
+```
+
+<a id="markdown-segmentation-based-text-recognition-task" name="segmentation-based-text-recognition-task"></a>
+#### Segmentation-Based Text Recognition Task
+```python
+prefix = 'tests/data/ocr_char_ann_toy_dataset/'
+train = dict(
+    type='OCRSegDataset',
+    img_prefix=prefix + 'imgs',
+    ann_file=prefix + 'instances_train.txt',
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=10,
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'annotations', 'text'])),
+    pipeline=train_pipeline,
+    test_mode=True)
+```
+You can check the content of the annotation file in `tests/data/ocr_char_ann_toy_dataset/instances_train.txt`.
+The combination of `HardDiskLoader` and `LineJsonParser` will return a dict for each file by calling `__getitem__` each time:
+```python
+{"file_name": "resort_88_101_1.png", "annotations": [{"char_text": "F", "char_box": [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0]}, {"char_text": "r", "char_box": [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0]}, {"char_text": "o", "char_box": [33.0, 2.0, 43.0, 2.0, 36.0, 12.0, 25.0, 12.0]}, {"char_text": "m", "char_box": [46.0, 2.0, 61.0, 2.0, 53.0, 12.0, 39.0, 12.0]}, {"char_text": ":", "char_box": [61.0, 2.0, 69.0, 2.0, 63.0, 12.0, 55.0, 12.0]}], "text": "From:"}
+```
+
+<a id="markdown-text-detection-task" name="text-detection-task"></a>
+#### Text Detection Task
+```python
+dataset_type = 'TextDetDataset'
+img_prefix = 'tests/data/toy_dataset/imgs'
+test_anno_file = 'tests/data/toy_dataset/instances_test.txt'
+test = dict(
+    type=dataset_type,
+    img_prefix=img_prefix,
+    ann_file=test_anno_file,
+    loader=dict(
+        type='HardDiskLoader',
+        repeat=4,
+        parser=dict(
+            type='LineJsonParser',
+            keys=['file_name', 'height', 'width', 'annotations'])),
+    pipeline=test_pipeline,
+    test_mode=True)
+```
+The results are generated in the same way as the segmentation-based text recognition task above.
+You can check the content of the annotation file in `tests/data/toy_dataset/instances_test.txt`.
+The combination of `HardDiskLoader` and `LineJsonParser` will return a dict for each file by calling `__getitem__`:
+```python
+{"file_name": "test/img_10.jpg", "height": 720, "width": 1280, "annotations": [{"iscrowd": 1, "category_id": 1, "bbox": [260.0, 138.0, 24.0, 20.0], "segmentation": [[261, 138, 284, 140, 279, 158, 260, 158]]}, {"iscrowd": 0, "category_id": 1, "bbox": [288.0, 138.0, 129.0, 23.0], "segmentation": [[288, 138, 417, 140, 416, 161, 290, 157]]}, {"iscrowd": 0, "category_id": 1, "bbox": [743.0, 145.0, 37.0, 18.0], "segmentation": [[743, 145, 779, 146, 780, 163, 746, 163]]}, {"iscrowd": 0, "category_id": 1, "bbox": [783.0, 129.0, 50.0, 26.0], "segmentation": [[783, 129, 831, 132, 833, 155, 785, 153]]}, {"iscrowd": 1, "category_id": 1, "bbox": [831.0, 133.0, 43.0, 23.0], "segmentation": [[831, 133, 870, 135, 874, 156, 835, 155]]}, {"iscrowd": 1, "category_id": 1, "bbox": [159.0, 204.0, 72.0, 15.0], "segmentation": [[159, 205, 230, 204, 231, 218, 159, 219]]}, {"iscrowd": 1, "category_id": 1, "bbox": [785.0, 158.0, 75.0, 21.0], "segmentation": [[785, 158, 856, 158, 860, 178, 787, 179]]}, {"iscrowd": 1, "category_id": 1, "bbox": [1011.0, 157.0, 68.0, 16.0], "segmentation": [[1011, 157, 1079, 160, 1076, 173, 1011, 170]]}]}
+```
+
+
+<a id="markdown-coco-like-dataset" name="coco-like-dataset"></a>
+### COCO-like Dataset
+For text detection, you can also use an annotation file in a COCO format that is defined in [mmdet](https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py):
+```python
+dataset_type = 'IcdarDataset'
+prefix = 'tests/data/toy_dataset/'
+test=dict(
+        type=dataset_type,
+        ann_file=prefix + 'instances_test.json',
+        img_prefix=prefix + 'imgs',
+        pipeline=test_pipeline)
+```
+You can check the content of the annotation file in `tests/data/toy_dataset/instances_test.json`
+- The icdar2015/2017 annotations have to be converted into the COCO format using `tools/data_converter/icdar_converter.py`:
+
+  ```shell
+  python tools/data_converter/icdar_converter.py ${src_root_path} -o ${out_path} -d ${data_type} --split-list training validation test
+  ```
+
+- The ctw1500 annotations have to be converted into the COCO format using `tools/data_converter/ctw1500_converter.py`:
+
+  ```shell
+  python tools/data_converter/ctw1500_converter.py ${src_root_path} -o ${out_path} --split-list training test
+  ```
+```
--- a/docs/index.rst
+++ b/docs/index.rst
@ -0,0 +1,38 @@
+Welcome to MMOCR's documentation!
+=======================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Get Started
+
+   install.md
+   getting_started.md
+   technical_details.md
+   contributing.md
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Model Zoo
+
+   modelzoo.md
+   textdet_models.md
+   textrecog_models.md
+   kie_models.md
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Notes
+
+   changelog.md
+   faq.md
+
+.. toctree::
+   :caption: API Reference
+
+   api.rst
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
--- a/docs/install.md
+++ b/docs/install.md
@ -0,0 +1,249 @@
+<a id="markdown-installation" name="installation"></a>
+# Installation
+<!-- TOC -->
+
+- [Installation](#installation)
+  - [Prerequisites](#prerequisites)
+  - [Step-by-Step Installation Instructions](#step-by-step-installation-instructions)
+  - [Full Set-up Script](#full-set-up-script)
+  - [Another option: Docker Image](#another-option-docker-image)
+  - [Prepare Datasets](#prepare-datasets)
+
+<!-- /TOC -->
+<a id="markdown-prerequisites" name="prerequisites"></a>
+## Prerequisites
+
+- Linux (Windows is not officially supported)
+- Python 3.7
+- PyTorch 1.5 or higher
+- torchvision 0.6.0
+- CUDA 10.1
+- NCCL 2
+- GCC 5.4.0 or higher
+- [mmcv](https://github.com/open-mmlab/mmcv) 1.2.6
+
+We have tested the following versions of OS and softwares:
+
+- OS: Ubuntu 16.04
+- CUDA: 10.1
+- GCC(G++): 5.4.0
+- mmcv 1.2.6
+- PyTorch 1.5
+- torchvision 0.6.0
+
+MMOCR depends on Pytorch and mmdetection v2.9.0.
+
+<a id="markdown-step-by-step-installation-instructions" name="step-by-step-installation-instructions"></a>
+## Step-by-Step Installation Instructions
+
+a. Create a conda virtual environment and activate it.
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+```
+
+b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/), e.g.,
+
+```shell
+conda install pytorch==1.5.0 torchvision==0.6.0 cudatoolkit=10.1 -c pytorch
+```
+Note: Make sure that your compilation CUDA version and runtime CUDA version match.
+You can check the supported CUDA version for precompiled packages on the [PyTorch website](https://pytorch.org/).
+
+`E.g. 1` If you have CUDA 10.1 installed under `/usr/local/cuda` and would like to install
+PyTorch 1.5, you need to install the prebuilt PyTorch with CUDA 10.1.
+
+```python
+conda install pytorch cudatoolkit=10.1 torchvision -c pytorch
+```
+
+`E.g. 2` If you have CUDA 9.2 installed under `/usr/local/cuda` and would like to install
+PyTorch 1.3.1., you need to install the prebuilt PyTorch with CUDA 9.2.
+
+```python
+conda install pytorch=1.3.1 cudatoolkit=9.2 torchvision=0.4.2 -c pytorch
+```
+
+If you build PyTorch from source instead of installing the prebuilt package,
+you can use more CUDA versions such as 9.0.
+
+c. Create a folder called `code` and clone the mmcv repository into it.
+
+```shell
+mkdir code
+cd code
+git clone https://github.com/open-mmlab/mmcv.git
+cd mmcv
+git checkout -b v1.2.6 v1.2.6
+pip install -r requirements.txt
+MMCV_WITH_OPS=1 pip install -v -e .
+```
+
+d. Clone the mmdetection repository into it. The mmdetection repo is separate from the mmcv repo in `code`.
+
+```shell
+cd ..
+git clone https://github.com/open-mmlab/mmdetection.git
+cd mmdetection
+git checkout -b v2.9.0 v2.9.0
+pip install -r requirements.txt
+pip install -v -e .
+export PYTHONPATH=$(pwd):$PYTHONPATH
+```
+
+Note that we have tested mmdetection v2.9.0 only. Other versions might be incompatible.
+
+e. Clone the mmocr repository into it. The mmdetection repo is separate from the mmcv and mmdetection repo in `code`.
+
+```shell
+cd ..
+git clone https://github.com/open-mmlab/mmocr.git
+cd mmocr
+```
+
+f. Install build requirements and then install MMOCR.
+
+```shell
+pip install -r requirements.txt
+pip install -v -e .  # or "python setup.py build_ext --inplace"
+export PYTHONPATH=$(pwd):$PYTHONPATH
+```
+
+<a id="markdown-full-set-up-script" name="full-set-up-script"></a>
+## Full Set-up Script
+
+Here is the full script for setting up mmocr with conda.
+
+```shell
+conda create -n open-mmlab python=3.7 -y
+conda activate open-mmlab
+
+# install latest pytorch prebuilt with the default prebuilt CUDA version (usually the latest)
+conda install pytorch==1.5.0 torchvision==0.6.0 cudatoolkit=10.1 -c pytorch
+
+# install mmcv
+mkdir code
+cd code
+git clone https://github.com/open-mmlab/mmcv.git
+cd mmcv # code/mmcv
+git checkout -b v1.2.6 v1.2.6
+pip install -r requirements.txt
+MMCV_WITH_OPS=1 pip install -v -e .
+
+# install mmdetection
+cd .. # exit to code
+git clone https://github.com/open-mmlab/mmdetection.git
+cd mmdetection # code/mmdetection
+git checkout -b v2.9.0 v2.9.0
+pip install -r requirements.txt
+pip install -v -e .
+export PYTHONPATH=$(pwd):$PYTHONPATH
+
+# install mmocr
+cd ..
+git clone https://github.com/open-mmlab/mmocr.git
+cd mmocr # code/mmocr
+
+pip install -r requirements.txt
+pip install -v -e .  # or "python setup.py build_ext --inplace"
+export PYTHONPATH=$(pwd):$PYTHONPATH
+```
+
+<a id="markdown-another-option-docker-image" name="another-option-docker-image"></a>
+## Another option: Docker Image
+
+We provide a [Dockerfile](https://github.com/open-mmlab/mmocr/blob/master/docker/Dockerfile) to build an image.
+
+```shell
+# build an image with PyTorch 1.5, CUDA 10.1
+docker build -t mmocr docker/
+```
+
+Run it with
+
+```shell
+docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmocr/data mmocr
+```
+
+<a id="markdown-prepare-datasets" name="prepare-datasets"></a>
+## Prepare Datasets
+
+It is recommended to symlink the dataset root to `mmocr/data`. Please refer to [datasets.md](datasets.md) to prepare your datasets.
+If your folder structure is different, you may need to change the corresponding paths in config files.
+
+The `mmocr` folder is organized as follows:
+```
+mmocr
+.
+├── configs
+│   ├── _base_
+│   ├── kie
+│   ├── textdet
+│   └── textrecog
+├── demo
+│   ├── demo_text_det.jpg
+│   ├── demo_text_recog.jpg
+│   ├── image_demo.py
+│   └── webcam_demo.py
+├── docs
+│   ├── api.rst
+│   ├── changelog.md
+│   ├── code_of_conduct.md
+│   ├── conf.py
+│   ├── contributing.md
+│   ├── datasets.md
+│   ├── getting_started.md
+│   ├── index.rst
+│   ├── install.md
+│   ├── make.bat
+│   ├── Makefile
+│   ├── merge_docs.sh
+│   ├── requirements.txt
+│   ├── res
+│   ├── stats.py
+│   └── technical_details.md
+├── LICENSE
+├── mmocr
+│   ├── apis
+│   ├── core
+│   ├── datasets
+│   ├── __init__.py
+│   ├── models
+│   ├── utils
+│   └── version.py
+├── README.md
+├── requirements
+│   ├── build.txt
+│   ├── docs.txt
+│   ├── optional.txt
+│   ├── readthedocs.txt
+│   ├── runtime.txt
+│   └── tests.txt
+├── requirements.txt
+├── resources
+│   ├── illustration.jpg
+│   └── mmocr-logo.png
+├── setup.cfg
+├── setup.py
+├── tests
+│   ├── data
+│   ├── test_dataset
+│   ├── test_metrics
+│   ├── test_models
+│   ├── test_tools
+│   └── test_utils
+└── tools
+    ├── data
+    ├── dist_test.sh
+    ├── dist_train.sh
+    ├── ocr_test_imgs.py
+    ├── ocr_test_imgs.sh
+    ├── publish_model.py
+    ├── slurm_test.sh
+    ├── slurm_train.sh
+    ├── test_imgs.py
+    ├── test_imgs.sh
+    ├── test.py
+    └── train.py
+```
--- a/docs/make.bat
+++ b/docs/make.bat
@ -0,0 +1,36 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/docs/merge_docs.sh
+++ b/docs/merge_docs.sh
@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+sed -i '$a\\n' ../configs/kie/*/*.md
+sed -i '$a\\n' ../configs/textdet/*/*.md
+sed -i '$a\\n' ../configs/textrecog/*/*.md
+
+# gather models
+cat ../configs/kie/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Kie Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >kie_models.md
+cat ../configs/textdet/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Detection Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textdet_models.md
+cat ../configs/textrecog/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textrecog_models.md
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -0,0 +1,4 @@
+recommonmark
+sphinx
+sphinx_markdown_tables
+sphinx_rtd_theme
--- a/docs/res/git-workflow-feature.png
+++ b/docs/res/git-workflow-feature.png
--- a/docs/res/git-workflow-master-develop.png
+++ b/docs/res/git-workflow-master-develop.png
--- a/docs/stats.py
+++ b/docs/stats.py
@ -0,0 +1,94 @@
+#!/usr/bin/env python
+import functools as func
+import glob
+import re
+from os.path import basename, splitext
+
+import numpy as np
+import titlecase
+
+
+def anchor(name):
+    return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
+                                     name.strip().lower())).strip('-')
+
+
+# Count algorithms
+
+files = sorted(glob.glob('*_models.md'))
+# files = sorted(glob.glob('docs/*_models.md'))
+
+stats = []
+
+for f in files:
+    with open(f, 'r') as content_file:
+        content = content_file.read()
+
+    # title
+    title = content.split('\n')[0].replace('#', '')
+
+    # count papers
+    papers = set((papertype, titlecase.titlecase(paper.lower().strip()))
+                 for (papertype, paper) in re.findall(
+                     r'\n\s*\[([A-Z]+?)\]\s*\n.*?\btitle\s*=\s*{(.*?)}',
+                     content, re.DOTALL))
+    # paper links
+    revcontent = '\n'.join(list(reversed(content.splitlines())))
+    paperlinks = {}
+    for _, p in papers:
+        print(p)
+        q = p.replace('\\', '\\\\').replace('?', '\\?')
+        paperlinks[p] = ' '.join(
+            (f'[⇨]({splitext(basename(f))[0]}.html#{anchor(paperlink)})'
+             for paperlink in re.findall(
+                 rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
+                 revcontent, re.DOTALL | re.IGNORECASE)))
+        print('   ', paperlinks[p])
+    paperlist = '\n'.join(
+        sorted(f'    - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+    # count configs
+    configs = set(x.lower().strip()
+                  for x in re.findall(r'https.*configs/.*\.py', content))
+
+    # count ckpts
+    ckpts = set(x.lower().strip()
+                for x in re.findall(r'https://download.*\.pth', content)
+                if 'mmaction' in x)
+
+    statsmsg = f"""
+## [{title}]({f})
+
+* Number of checkpoints: {len(ckpts)}
+* Number of configs: {len(configs)}
+* Number of papers: {len(papers)}
+{paperlist}
+
+    """
+
+    stats.append((papers, configs, ckpts, statsmsg))
+
+allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
+allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
+allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
+msglist = '\n'.join(x for _, _, _, x in stats)
+
+papertypes, papercounts = np.unique([t for t, _ in allpapers],
+                                    return_counts=True)
+countstr = '\n'.join(
+    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+
+modelzoo = f"""
+# Overview
+
+* Number of checkpoints: {len(allckpts)}
+* Number of configs: {len(allconfigs)}
+* Number of papers: {len(allpapers)}
+{countstr}
+
+For supported datasets, see [datasets overview](datasets.md).
+
+{msglist}
+"""
+
+with open('modelzoo.md', 'w') as f:
+    f.write(modelzoo)
--- a/mmocr/init.py
+++ b/mmocr/init.py
@ -0,0 +1,3 @@
+from .version import __version__, short_version
+
+__all__ = ['__version__', 'short_version']
--- a/mmocr/apis/init.py
+++ b/mmocr/apis/init.py
@ -0,0 +1,3 @@
+from .inference import model_inference
+
+__all__ = ['model_inference']
--- a/mmocr/apis/inference.py
+++ b/mmocr/apis/inference.py
@ -0,0 +1,43 @@
+import torch
+from mmcv.ops import RoIPool
+from mmcv.parallel import collate, scatter
+
+from mmdet.datasets.pipelines import Compose
+
+
+def model_inference(model, img):
+    """Inference image(s) with the detector.
+
+    Args:
+        model (nn.Module): The loaded detector.
+        imgs (str): Image files.
+
+    Returns:
+        result (dict): Detection results.
+    """
+    assert isinstance(img, str)
+
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    data = dict(img_info=dict(filename=img), img_prefix=None)
+    # build the data pipeline
+    test_pipeline = Compose(cfg.data.test.pipeline)
+    data = test_pipeline(data)
+    data = collate([data], samples_per_gpu=1)
+
+    # process img_metas
+    data['img_metas'] = data['img_metas'][0].data
+
+    if next(model.parameters()).is_cuda:
+        # scatter to specified GPU
+        data = scatter(data, [device])[0]
+    else:
+        for m in model.modules():
+            assert not isinstance(
+                m, RoIPool
+            ), 'CPU inference with RoIPool is not supported currently.'
+
+    # forward the model
+    with torch.no_grad():
+        result = model(return_loss=False, rescale=True, **data)[0]
+    return result
--- a/mmocr/core/init.py
+++ b/mmocr/core/init.py
@ -0,0 +1,3 @@
+from .evaluation import *  # noqa: F401, F403
+from .mask import *  # noqa: F401, F403
+from .visualize import *  # noqa: F401, F403
--- a/mmocr/core/evaluation/init.py
+++ b/mmocr/core/evaluation/init.py
@ -0,0 +1,10 @@
+from .hmean import eval_hmean
+from .hmean_ic13 import eval_hmean_ic13
+from .hmean_iou import eval_hmean_iou
+from .kie_metric import compute_f1_score
+from .ocr_metric import eval_ocr_metric
+
+__all__ = [
+    'eval_hmean_ic13', 'eval_hmean_iou', 'eval_ocr_metric', 'eval_hmean',
+    'compute_f1_score'
+]
--- a/mmocr/core/evaluation/hmean.py
+++ b/mmocr/core/evaluation/hmean.py
@ -0,0 +1,149 @@
+from operator import itemgetter
+
+import mmcv
+from mmcv.utils import print_log
+
+import mmocr.utils as utils
+from mmocr.core.evaluation import hmean_ic13, hmean_iou
+from mmocr.core.evaluation.utils import (filter_2dlist_result,
+                                         select_top_boundary)
+from mmocr.core.mask import extract_boundary
+
+
+def output_ranklist(img_results, img_infos, out_file):
+    """Output the worst results for debugging.
+
+    Args:
+        img_results (list[dict]): Image result list.
+        img_infos (list[dict]): Image information list.
+        out_file (str): The output file path.
+
+    Returns:
+        sorted_results (list[dict]): Image results sorted by hmean.
+    """
+    assert utils.is_type_list(img_results, dict)
+    assert utils.is_type_list(img_infos, dict)
+    assert isinstance(out_file, str)
+    assert out_file.endswith('json')
+
+    sorted_results = []
+    for inx, result in enumerate(img_results):
+        name = img_infos[inx]['file_name']
+        img_result = result
+        img_result['file_name'] = name
+        sorted_results.append(img_result)
+    sorted_results = sorted(
+        sorted_results, key=itemgetter('hmean'), reverse=False)
+
+    mmcv.dump(sorted_results, file=out_file)
+
+    return sorted_results
+
+
+def get_gt_masks(ann_infos):
+    """Get ground truth masks and ignored masks.
+
+    Args:
+        ann_infos (list[dict]): Each dict contains annotation
+            infos of one image, containing following keys:
+            masks, masks_ignore.
+    Returns:
+        gt_masks (list[list[list[int]]]): Ground truth masks.
+        gt_masks_ignore (list[list[list[int]]]): Ignored masks.
+    """
+    assert utils.is_type_list(ann_infos, dict)
+
+    gt_masks = []
+    gt_masks_ignore = []
+    for ann_info in ann_infos:
+        masks = ann_info['masks']
+        mask_gt = []
+        for mask in masks:
+            assert len(mask[0]) >= 8 and len(mask[0]) % 2 == 0
+            mask_gt.append(mask[0])
+        gt_masks.append(mask_gt)
+
+        masks_ignore = ann_info['masks_ignore']
+        mask_gt_ignore = []
+        for mask_ignore in masks_ignore:
+            assert len(mask_ignore[0]) >= 8 and len(mask_ignore[0]) % 2 == 0
+            mask_gt_ignore.append(mask_ignore[0])
+        gt_masks_ignore.append(mask_gt_ignore)
+
+    return gt_masks, gt_masks_ignore
+
+
+def eval_hmean(results,
+               img_infos,
+               ann_infos,
+               metrics={'hmean-iou'},
+               score_thr=0.3,
+               rank_list=None,
+               logger=None,
+               **kwargs):
+    """Evaluation in hmean metric.
+
+    Args:
+        results (list[dict]): Each dict corresponds to one image,
+            containing the following keys: boundary_result
+        img_infos (list[dict]): Each dict corresponds to one image,
+            containing the following keys: filename, height, width
+        ann_infos (list[dict]): Each dict corresponds to one image,
+            containing the following keys: masks, masks_ignore
+        score_thr (float): Score threshold of prediction map.
+        metrics (set{str}): Hmean metric set, should be one or all of
+            {'hmean-iou', 'hmean-ic13'}
+    Returns:
+        dict[str: float]
+    """
+    assert utils.is_type_list(results, dict)
+    assert utils.is_type_list(img_infos, dict)
+    assert utils.is_type_list(ann_infos, dict)
+    assert len(results) == len(img_infos) == len(ann_infos)
+    assert isinstance(metrics, set)
+
+    gts, gts_ignore = get_gt_masks(ann_infos)
+
+    preds = []
+    pred_scores = []
+    for result in results:
+        _, texts, scores = extract_boundary(result)
+        if len(texts) > 0:
+            assert utils.valid_boundary(texts[0], False)
+        valid_texts, valid_text_scores = filter_2dlist_result(
+            texts, scores, score_thr)
+        preds.append(valid_texts)
+        pred_scores.append(valid_text_scores)
+
+    eval_results = {}
+    for metric in metrics:
+        msg = f'Evaluating {metric}...'
+        if logger is None:
+            msg = '\n' + msg
+        print_log(msg, logger=logger)
+        best_result = dict(hmean=-1)
+        for iter in range(3, 10):
+            thr = iter * 0.1
+            top_preds = select_top_boundary(preds, pred_scores, thr)
+            if metric == 'hmean-iou':
+                result, img_result = hmean_iou.eval_hmean_iou(
+                    top_preds, gts, gts_ignore)
+            elif metric == 'hmean-ic13':
+                result, img_result = hmean_ic13.eval_hmean_ic13(
+                    top_preds, gts, gts_ignore)
+            else:
+                raise NotImplementedError
+            if rank_list is not None:
+                output_ranklist(img_result, img_infos, rank_list)
+
+            print_log(
+                'thr {0:.1f}, recall：{1[recall]:.3f}, '
+                'precision: {1[precision]:.3f}, '
+                'hmean:{1[hmean]:.3f}'.format(thr, result),
+                logger=logger)
+            if result['hmean'] > best_result['hmean']:
+                best_result = result
+        eval_results[metric + ':recall'] = best_result['recall']
+        eval_results[metric + ':precision'] = best_result['precision']
+        eval_results[metric + ':hmean'] = best_result['hmean']
+    return eval_results
--- a/mmocr/core/evaluation/hmean_ic13.py
+++ b/mmocr/core/evaluation/hmean_ic13.py
@ -0,0 +1,216 @@
+import numpy as np
+
+import mmocr.utils as utils
+from . import utils as eval_utils
+
+
+def compute_recall_precision(gt_polys, pred_polys):
+    """Compute the recall and the precision matrices between gt and predicted
+    polygons.
+
+    Args:
+        gt_polys (list[Polygon]): List of gt polygons.
+        pred_polys (list[Polygon]): List of predicted polygons.
+
+    Returns:
+        recall (ndarray): Recall matrix of size gt_num x det_num.
+        precision (ndarray): Precision matrix of size gt_num x det_num.
+    """
+    assert isinstance(gt_polys, list)
+    assert isinstance(pred_polys, list)
+
+    gt_num = len(gt_polys)
+    det_num = len(pred_polys)
+    sz = [gt_num, det_num]
+
+    recall = np.zeros(sz)
+    precision = np.zeros(sz)
+    # compute area recall and precision for each (gt, det) pair
+    # in one img
+    for gt_id in range(gt_num):
+        for pred_id in range(det_num):
+            gt = gt_polys[gt_id]
+            det = pred_polys[pred_id]
+
+            inter_area, _ = eval_utils.poly_intersection(det, gt)
+            gt_area = gt.area()
+            det_area = det.area()
+            if gt_area != 0:
+                recall[gt_id, pred_id] = inter_area / gt_area
+            if det_area != 0:
+                precision[gt_id, pred_id] = inter_area / det_area
+
+    return recall, precision
+
+
+def eval_hmean_ic13(det_boxes,
+                    gt_boxes,
+                    gt_ignored_boxes,
+                    precision_thr=0.4,
+                    recall_thr=0.8,
+                    center_dist_thr=1.0,
+                    one2one_score=1.,
+                    one2many_score=0.8,
+                    many2one_score=1.):
+    """Evalute hmean of text detection using the icdar2013 standard.
+
+    Args:
+        det_boxes (list[list[list[float]]]): List of arrays of shape (n, 2k).
+            Each element is the det_boxes for one img. k>=4.
+        gt_boxes (list[list[list[float]]]): List of arrays of shape (m, 2k).
+            Each element is the gt_boxes for one img. k>=4.
+        gt_ignored_boxes (list[list[list[float]]]): List of arrays of
+            (l, 2k). Each element is the ignored gt_boxes for one img. k>=4.
+        precision_thr (float): Precision threshold of the iou of one
+            (gt_box, det_box) pair.
+        recall_thr (float): Recall threshold of the iou of one
+            (gt_box, det_box) pair.
+        center_dist_thr (float): Distance threshold of one (gt_box, det_box)
+            center point pair.
+        one2one_score (float): Reward when one gt matches one det_box.
+        one2many_score (float): Reward when one gt matches many det_boxes.
+        many2one_score (float): Reward when many gts match one det_box.
+
+    Returns:
+        hmean (tuple[dict]): Tuple of dicts which encodes the hmean for
+        the dataset and all images.
+    """
+    assert utils.is_3dlist(det_boxes)
+    assert utils.is_3dlist(gt_boxes)
+    assert utils.is_3dlist(gt_ignored_boxes)
+
+    assert 0 <= precision_thr <= 1
+    assert 0 <= recall_thr <= 1
+    assert center_dist_thr > 0
+    assert 0 <= one2one_score <= 1
+    assert 0 <= one2many_score <= 1
+    assert 0 <= many2one_score <= 1
+
+    img_num = len(det_boxes)
+    assert img_num == len(gt_boxes)
+    assert img_num == len(gt_ignored_boxes)
+
+    dataset_gt_num = 0
+    dataset_pred_num = 0
+    dataset_hit_recall = 0.0
+    dataset_hit_prec = 0.0
+
+    img_results = []
+
+    for i in range(img_num):
+        gt = gt_boxes[i]
+        gt_ignored = gt_ignored_boxes[i]
+        pred = det_boxes[i]
+
+        gt_num = len(gt)
+        ignored_num = len(gt_ignored)
+        pred_num = len(pred)
+
+        accum_recall = 0.
+        accum_precision = 0.
+
+        gt_points = gt + gt_ignored
+        gt_polys = [eval_utils.points2polygon(p) for p in gt_points]
+        gt_ignored_index = [gt_num + i for i in range(len(gt_ignored))]
+        gt_num = len(gt_polys)
+
+        pred_polys, pred_points, pred_ignored_index = eval_utils.ignore_pred(
+            pred, gt_ignored_index, gt_polys, precision_thr)
+
+        if pred_num > 0 and gt_num > 0:
+
+            gt_hit = np.zeros(gt_num, np.int8).tolist()
+            pred_hit = np.zeros(pred_num, np.int8).tolist()
+
+            # compute area recall and precision for each (gt, pred) pair
+            # in one img.
+            recall_mat, precision_mat = compute_recall_precision(
+                gt_polys, pred_polys)
+
+            # match one gt to one pred box.
+            for gt_id in range(gt_num):
+                for pred_id in range(pred_num):
+                    if (gt_hit[gt_id] != 0 or pred_hit[pred_id] != 0
+                            or gt_id in gt_ignored_index
+                            or pred_id in pred_ignored_index):
+                        continue
+                    match = eval_utils.one2one_match_ic13(
+                        gt_id, pred_id, recall_mat, precision_mat, recall_thr,
+                        precision_thr)
+
+                    if match:
+                        gt_point = np.array(gt_points[gt_id])
+                        det_point = np.array(pred_points[pred_id])
+
+                        norm_dist = eval_utils.box_center_distance(
+                            det_point, gt_point)
+                        norm_dist /= eval_utils.box_diag(
+                            det_point) + eval_utils.box_diag(gt_point)
+                        norm_dist *= 2.0
+
+                        if norm_dist < center_dist_thr:
+                            gt_hit[gt_id] = 1
+                            pred_hit[pred_id] = 1
+                            accum_recall += one2one_score
+                            accum_precision += one2one_score
+
+            # match one gt to many det boxes.
+            for gt_id in range(gt_num):
+                if gt_id in gt_ignored_index:
+                    continue
+                match, match_det_set = eval_utils.one2many_match_ic13(
+                    gt_id, recall_mat, precision_mat, recall_thr,
+                    precision_thr, gt_hit, pred_hit, pred_ignored_index)
+
+                if match:
+                    gt_hit[gt_id] = 1
+                    accum_recall += one2many_score
+                    accum_precision += one2many_score * len(match_det_set)
+                    for pred_id in match_det_set:
+                        pred_hit[pred_id] = 1
+
+            # match many gt to one det box. One pair of (det,gt) are matched
+            # successfully if their recall, precision, normalized distance
+            # meet some thresholds.
+            for pred_id in range(pred_num):
+                if pred_id in pred_ignored_index:
+                    continue
+
+                match, match_gt_set = eval_utils.many2one_match_ic13(
+                    pred_id, recall_mat, precision_mat, recall_thr,
+                    precision_thr, gt_hit, pred_hit, gt_ignored_index)
+
+                if match:
+                    pred_hit[pred_id] = 1
+                    accum_recall += many2one_score * len(match_gt_set)
+                    accum_precision += many2one_score
+                    for gt_id in match_gt_set:
+                        gt_hit[gt_id] = 1
+
+        gt_care_number = gt_num - ignored_num
+        pred_care_number = pred_num - len(pred_ignored_index)
+
+        r, p, h = eval_utils.compute_hmean(accum_recall, accum_precision,
+                                           gt_care_number, pred_care_number)
+
+        img_results.append({'recall': r, 'precision': p, 'hmean': h})
+
+        dataset_gt_num += gt_care_number
+        dataset_pred_num += pred_care_number
+        dataset_hit_recall += accum_recall
+        dataset_hit_prec += accum_precision
+
+    total_r, total_p, total_h = eval_utils.compute_hmean(
+        dataset_hit_recall, dataset_hit_prec, dataset_gt_num, dataset_pred_num)
+
+    dataset_results = {
+        'num_gts': dataset_gt_num,
+        'num_dets': dataset_pred_num,
+        'num_recall': dataset_hit_recall,
+        'num_precision': dataset_hit_prec,
+        'recall': total_r,
+        'precision': total_p,
+        'hmean': total_h
+    }
+
+    return dataset_results, img_results
--- a/mmocr/core/evaluation/hmean_iou.py
+++ b/mmocr/core/evaluation/hmean_iou.py
@ -0,0 +1,116 @@
+import numpy as np
+
+import mmocr.utils as utils
+from . import utils as eval_utils
+
+
+def eval_hmean_iou(pred_boxes,
+                   gt_boxes,
+                   gt_ignored_boxes,
+                   iou_thr=0.5,
+                   precision_thr=0.5):
+    """Evalute hmean of text detection using IOU standard.
+
+    Args:
+        pred_boxes (list[list[list[float]]]): Text boxes for an img list. Each
+            box has 2k (>=8) values.
+        gt_boxes (list[list[list[float]]]): Ground truth text boxes for an img
+            list. Each box has 2k (>=8) values.
+        gt_ignored_boxes (list[list[list[float]]]): Ignored ground truth text
+            boxes for an img list. Each box has 2k (>=8) values.
+        iou_thr (float): Iou threshold when one (gt_box, det_box) pair is
+            matched.
+        precision_thr (float): Precision threshold when one (gt_box, det_box)
+            pair is matched.
+
+    Returns:
+        hmean (tuple[dict]): Tuple of dicts indicates the hmean for the dataset
+            and all images.
+    """
+    assert utils.is_3dlist(pred_boxes)
+    assert utils.is_3dlist(gt_boxes)
+    assert utils.is_3dlist(gt_ignored_boxes)
+    assert 0 <= iou_thr <= 1
+    assert 0 <= precision_thr <= 1
+
+    img_num = len(pred_boxes)
+    assert img_num == len(gt_boxes)
+    assert img_num == len(gt_ignored_boxes)
+
+    dataset_gt_num = 0
+    dataset_pred_num = 0
+    dataset_hit_num = 0
+
+    img_results = []
+
+    for i in range(img_num):
+        gt = gt_boxes[i]
+        gt_ignored = gt_ignored_boxes[i]
+        pred = pred_boxes[i]
+
+        gt_num = len(gt)
+        gt_ignored_num = len(gt_ignored)
+        pred_num = len(pred)
+
+        hit_num = 0
+
+        # get gt polygons.
+        gt_all = gt + gt_ignored
+        gt_polys = [eval_utils.points2polygon(p) for p in gt_all]
+        gt_ignored_index = [gt_num + i for i in range(len(gt_ignored))]
+        gt_num = len(gt_polys)
+        pred_polys, _, pred_ignored_index = eval_utils.ignore_pred(
+            pred, gt_ignored_index, gt_polys, precision_thr)
+
+        # match.
+        if gt_num > 0 and pred_num > 0:
+            sz = [gt_num, pred_num]
+            iou_mat = np.zeros(sz)
+
+            gt_hit = np.zeros(gt_num, np.int8)
+            pred_hit = np.zeros(pred_num, np.int8)
+
+            for gt_id in range(gt_num):
+                for pred_id in range(pred_num):
+                    gt_pol = gt_polys[gt_id]
+                    det_pol = pred_polys[pred_id]
+
+                    iou_mat[gt_id,
+                            pred_id] = eval_utils.poly_iou(det_pol, gt_pol)
+
+            for gt_id in range(gt_num):
+                for pred_id in range(pred_num):
+                    if (gt_hit[gt_id] != 0 or pred_hit[pred_id] != 0
+                            or gt_id in gt_ignored_index
+                            or pred_id in pred_ignored_index):
+                        continue
+                    if iou_mat[gt_id, pred_id] > iou_thr:
+                        gt_hit[gt_id] = 1
+                        pred_hit[pred_id] = 1
+                        hit_num += 1
+
+        gt_care_number = gt_num - gt_ignored_num
+        pred_care_number = pred_num - len(pred_ignored_index)
+
+        r, p, h = eval_utils.compute_hmean(hit_num, hit_num, gt_care_number,
+                                           pred_care_number)
+
+        img_results.append({'recall': r, 'precision': p, 'hmean': h})
+
+        dataset_hit_num += hit_num
+        dataset_gt_num += gt_care_number
+        dataset_pred_num += pred_care_number
+
+    dataset_r, dataset_p, dataset_h = eval_utils.compute_hmean(
+        dataset_hit_num, dataset_hit_num, dataset_gt_num, dataset_pred_num)
+
+    dataset_results = {
+        'num_gts': dataset_gt_num,
+        'num_dets': dataset_pred_num,
+        'num_match': dataset_hit_num,
+        'recall': dataset_r,
+        'precision': dataset_p,
+        'hmean': dataset_h
+    }
+
+    return dataset_results, img_results
--- a/mmocr/core/evaluation/kie_metric.py
+++ b/mmocr/core/evaluation/kie_metric.py
@ -0,0 +1,27 @@
+import torch
+
+
+def compute_f1_score(preds, gts, ignores=[]):
+    """Compute the F1-score of prediction.
+
+    Args:
+        preds (Tensor): The predicted probability NxC map
+            with N and C being the sample number and class
+            number respectively.
+        gts (Tensor): The ground truth vector of size N.
+        ignores (list): The index set of classes that are ignored when
+            reporting results.
+            Note: all samples are participated in computing.
+
+     Returns:
+        The numpy list of f1-scores of valid classes.
+    """
+    C = preds.size(1)
+    classes = torch.LongTensor(sorted(set(range(C)) - set(ignores)))
+    hist = torch.bincount(
+        gts * C + preds.argmax(1), minlength=C**2).view(C, C).float()
+    diag = torch.diag(hist)
+    recalls = diag / hist.sum(1).clamp(min=1)
+    precisions = diag / hist.sum(0).clamp(min=1)
+    f1 = 2 * recalls * precisions / (recalls + precisions).clamp(min=1e-8)
+    return f1[classes].cpu().numpy()
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`We appreciate all contributions to improve MMOCR. Please refer to [CONTRIBUTING.md](/docs/contributing.md) in MMCV for more details about the contributing guideline.`