RE-OWOD/projects/Panoptic-DeepLab/train_net.py

197 lines
6.9 KiB
Python
Raw Normal View History

2022-01-04 13:41:19 +08:00
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Panoptic-DeepLab Training Script.
This script is a simplified version of the training script in detectron2/tools.
"""
import os
from typing import Any, Dict, List, Set
import torch
import detectron2.data.transforms as T
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, build_detection_train_loader
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
from detectron2.evaluation import (
CityscapesInstanceEvaluator,
CityscapesSemSegEvaluator,
COCOEvaluator,
COCOPanopticEvaluator,
DatasetEvaluators,
)
from detectron2.projects.deeplab import build_lr_scheduler
from detectron2.projects.panoptic_deeplab import (
PanopticDeeplabDatasetMapper,
add_panoptic_deeplab_config,
)
from detectron2.solver.build import maybe_add_gradient_clipping
def build_sem_seg_train_aug(cfg):
augs = [
T.ResizeShortestEdge(
cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
)
]
if cfg.INPUT.CROP.ENABLED:
augs.append(T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
augs.append(T.RandomFlip())
return augs
class Trainer(DefaultTrainer):
"""
We use the "DefaultTrainer" which contains a number pre-defined logic for
standard training workflow. They may not work for you, especially if you
are working on a new research project. In that case you can use the cleaner
"SimpleTrainer", or write your own training loop.
"""
@classmethod
def build_evaluator(cls, cfg, dataset_name, output_folder=None):
"""
Create evaluator(s) for a given dataset.
This uses the special metadata "evaluator_type" associated with each builtin dataset.
For your own dataset, you can simply create an evaluator manually in your
script and do not have to worry about the hacky if-else logic here.
"""
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
evaluator_list = []
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
if evaluator_type in ["cityscapes_panoptic_seg", "coco_panoptic_seg"]:
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
if evaluator_type == "cityscapes_panoptic_seg":
assert (
torch.cuda.device_count() >= comm.get_rank()
), "CityscapesEvaluator currently do not work with multiple machines."
evaluator_list.append(CityscapesSemSegEvaluator(dataset_name))
evaluator_list.append(CityscapesInstanceEvaluator(dataset_name))
if evaluator_type == "coco_panoptic_seg":
# Evaluate bbox and segm.
cfg.defrost()
cfg.MODEL.MASK_ON = True
cfg.MODEL.KEYPOINT_ON = False
cfg.freeze()
evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
if len(evaluator_list) == 0:
raise NotImplementedError(
"no Evaluator for the dataset {} with the type {}".format(
dataset_name, evaluator_type
)
)
elif len(evaluator_list) == 1:
return evaluator_list[0]
return DatasetEvaluators(evaluator_list)
@classmethod
def build_train_loader(cls, cfg):
mapper = PanopticDeeplabDatasetMapper(cfg, augmentations=build_sem_seg_train_aug(cfg))
return build_detection_train_loader(cfg, mapper=mapper)
@classmethod
def build_lr_scheduler(cls, cfg, optimizer):
"""
It now calls :func:`detectron2.solver.build_lr_scheduler`.
Overwrite it if you'd like a different scheduler.
"""
return build_lr_scheduler(cfg, optimizer)
@classmethod
def build_optimizer(cls, cfg, model):
"""
Build an optimizer from config.
"""
norm_module_types = (
torch.nn.BatchNorm1d,
torch.nn.BatchNorm2d,
torch.nn.BatchNorm3d,
torch.nn.SyncBatchNorm,
# NaiveSyncBatchNorm inherits from BatchNorm2d
torch.nn.GroupNorm,
torch.nn.InstanceNorm1d,
torch.nn.InstanceNorm2d,
torch.nn.InstanceNorm3d,
torch.nn.LayerNorm,
torch.nn.LocalResponseNorm,
)
params: List[Dict[str, Any]] = []
memo: Set[torch.nn.parameter.Parameter] = set()
for module in model.modules():
for key, value in module.named_parameters(recurse=False):
if not value.requires_grad:
continue
# Avoid duplicating parameters
if value in memo:
continue
memo.add(value)
lr = cfg.SOLVER.BASE_LR
weight_decay = cfg.SOLVER.WEIGHT_DECAY
if isinstance(module, norm_module_types):
weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM
elif key == "bias":
lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
optimizer_type = cfg.SOLVER.OPTIMIZER
if optimizer_type == "SGD":
optimizer = torch.optim.SGD(
params,
cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM,
nesterov=cfg.SOLVER.NESTEROV,
)
elif optimizer_type == "ADAM":
optimizer = torch.optim.Adam(params, cfg.SOLVER.BASE_LR)
else:
raise NotImplementedError(f"no optimizer type {optimizer_type}")
optimizer = maybe_add_gradient_clipping(cfg, optimizer)
return optimizer
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
add_panoptic_deeplab_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
return cfg
def main(args):
cfg = setup(args)
if args.eval_only:
model = Trainer.build_model(cfg)
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
cfg.MODEL.WEIGHTS, resume=args.resume
)
res = Trainer.test(cfg, model)
return res
trainer = Trainer(cfg)
trainer.resume_or_load(resume=args.resume)
return trainer.train()
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)