mmocr/tools/dist_train.sh
Tong Gao 838aa47f9f
[Feature] Add args and docs for multi-machine training/testing (#849)
* add args and docs for multi-machine training/testing

* update docs

* update docs

* update docs

* update docs

* revert commit
2022-03-22 16:25:39 +08:00

34 lines
733 B
Bash
Executable File

#!/usr/bin/env bash
if [ $# -lt 3 ]
then
echo "Usage: bash $0 CONFIG WORK_DIR GPUS"
exit
fi
CONFIG=$1
WORK_DIR=$2
GPUS=$3
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
if [ ${GPUS} == 1 ]; then
python $(dirname "$0")/train.py $CONFIG --work-dir=${WORK_DIR} ${@:4}
else
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/train.py \
$CONFIG \
--seed 0 \
--work-dir=${WORK_DIR} \
--launcher pytorch ${@:4}
fi