320 lines
11 KiB
Python
320 lines
11 KiB
Python
# Copyright (c) OpenMMLab. All rights reserved.
|
|
voxel_size = [0.16, 0.16, 4]
|
|
|
|
model = dict(
|
|
type='VoxelNet',
|
|
voxel_layer=dict(
|
|
max_num_points=32, # max_points_per_voxel
|
|
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
|
|
voxel_size=voxel_size,
|
|
max_voxels=(16000, 40000) # (training, testing) max_voxels
|
|
),
|
|
voxel_encoder=dict(
|
|
type='PillarFeatureNet',
|
|
in_channels=4,
|
|
feat_channels=[64],
|
|
with_distance=False,
|
|
voxel_size=voxel_size,
|
|
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
|
|
middle_encoder=dict(
|
|
type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
|
|
backbone=dict(
|
|
type='SECOND',
|
|
in_channels=64,
|
|
layer_nums=[3, 5, 5],
|
|
layer_strides=[2, 2, 2],
|
|
out_channels=[64, 128, 256]),
|
|
neck=dict(
|
|
type='SECONDFPN',
|
|
in_channels=[64, 128, 256],
|
|
upsample_strides=[1, 2, 4],
|
|
out_channels=[128, 128, 128]),
|
|
test_cfg=dict(
|
|
use_rotate_nms=True,
|
|
nms_across_levels=False,
|
|
nms_thr=0.01,
|
|
score_thr=0.1,
|
|
min_bbox_size=0,
|
|
nms_pre=100,
|
|
max_num=50),
|
|
bbox_head=dict(
|
|
type='Anchor3DHead',
|
|
num_classes=3,
|
|
in_channels=384,
|
|
feat_channels=384,
|
|
use_direction_classifier=True,
|
|
anchor_generator=dict(
|
|
type='AlignedAnchor3DRangeGenerator',
|
|
ranges=[
|
|
[0, -39.68, -0.6, 69.12, 39.68, -0.6],
|
|
[0, -39.68, -0.6, 69.12, 39.68, -0.6],
|
|
[0, -39.68, -1.78, 69.12, 39.68, -1.78],
|
|
],
|
|
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
|
|
rotations=[0, 1.57],
|
|
reshape_out=False),
|
|
diff_rad_by_sin=True,
|
|
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
|
|
loss_cls=dict(
|
|
type='FocalLoss',
|
|
use_sigmoid=True,
|
|
gamma=2.0,
|
|
alpha=0.25,
|
|
loss_weight=1.0),
|
|
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
|
|
loss_dir=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
|
|
point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
|
|
# dataset settings
|
|
data_root = 'tests/test_codebase/test_mmdet3d/data/kitti/'
|
|
dataset_type = 'KittiDataset'
|
|
class_names = ['Pedestrian', 'Cyclist', 'Car']
|
|
input_modality = dict(use_lidar=True, use_camera=False)
|
|
# PointPillars adopted a different sampling strategies among classes
|
|
db_sampler = dict(
|
|
data_root=data_root,
|
|
info_path=data_root + 'kitti_dbinfos_train.pkl',
|
|
rate=1.0,
|
|
prepare=dict(
|
|
filter_by_difficulty=[-1],
|
|
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
|
|
classes=class_names,
|
|
sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10))
|
|
train_pipeline = [
|
|
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
|
|
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
|
|
dict(type='ObjectSample', db_sampler=db_sampler),
|
|
dict(
|
|
type='ObjectNoise',
|
|
num_try=100,
|
|
translation_std=[0.25, 0.25, 0.25],
|
|
global_rot_range=[0.0, 0.0],
|
|
rot_range=[-0.15707963267, 0.15707963267]),
|
|
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
|
|
dict(
|
|
type='GlobalRotScaleTrans',
|
|
rot_range=[-0.78539816, 0.78539816],
|
|
scale_ratio_range=[0.95, 1.05]),
|
|
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
|
|
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
|
|
dict(type='PointShuffle'),
|
|
dict(type='DefaultFormatBundle3D', class_names=class_names),
|
|
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
|
|
]
|
|
test_pipeline = [
|
|
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
|
|
dict(
|
|
type='MultiScaleFlipAug3D',
|
|
img_scale=(1333, 800),
|
|
pts_scale_ratio=1,
|
|
flip=False,
|
|
transforms=[
|
|
dict(
|
|
type='GlobalRotScaleTrans',
|
|
rot_range=[0, 0],
|
|
scale_ratio_range=[1., 1.],
|
|
translation_std=[0, 0, 0]),
|
|
dict(type='RandomFlip3D'),
|
|
dict(
|
|
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
|
|
dict(
|
|
type='DefaultFormatBundle3D',
|
|
class_names=class_names,
|
|
with_label=False),
|
|
dict(type='Collect3D', keys=['points'])
|
|
])
|
|
]
|
|
data = dict(
|
|
train=dict(
|
|
dataset=dict(
|
|
pipeline=train_pipeline, classes=class_names,
|
|
box_type_3d='LiDAR')),
|
|
val=dict(pipeline=test_pipeline, classes=class_names, box_type_3d='LiDAR'),
|
|
test=dict(
|
|
type=dataset_type,
|
|
data_root=data_root,
|
|
ann_file=data_root + 'kitti_infos_val.pkl',
|
|
split='training',
|
|
pts_prefix='velodyne_reduced',
|
|
pipeline=test_pipeline,
|
|
modality=input_modality,
|
|
classes=class_names,
|
|
test_mode=True,
|
|
box_type_3d='LiDAR'))
|
|
|
|
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
|
|
centerpoint_model = dict(
|
|
pts_voxel_layer=dict(
|
|
max_num_points=20,
|
|
voxel_size=voxel_size,
|
|
max_voxels=(30000, 40000),
|
|
point_cloud_range=point_cloud_range),
|
|
pts_voxel_encoder=dict(
|
|
type='PillarFeatureNet',
|
|
in_channels=4,
|
|
feat_channels=[64],
|
|
with_distance=False,
|
|
voxel_size=(0.2, 0.2, 8),
|
|
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
|
|
legacy=False),
|
|
pts_middle_encoder=dict(
|
|
type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
|
|
pts_backbone=dict(
|
|
type='SECOND',
|
|
in_channels=64,
|
|
out_channels=[64, 128, 256],
|
|
layer_nums=[3, 5, 5],
|
|
layer_strides=[2, 2, 2],
|
|
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
|
|
conv_cfg=dict(type='Conv2d', bias=False)),
|
|
pts_neck=dict(
|
|
type='SECONDFPN',
|
|
in_channels=[64, 128, 256],
|
|
out_channels=[128, 128, 128],
|
|
upsample_strides=[0.5, 1, 2],
|
|
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
|
|
upsample_cfg=dict(type='deconv', bias=False),
|
|
use_conv_for_no_stride=True),
|
|
pts_bbox_head=dict(
|
|
type='CenterHead',
|
|
in_channels=sum([128, 128, 128]),
|
|
tasks=[
|
|
dict(num_class=1, class_names=['car']),
|
|
dict(num_class=2, class_names=['truck', 'construction_vehicle']),
|
|
dict(num_class=2, class_names=['bus', 'trailer']),
|
|
dict(num_class=1, class_names=['barrier']),
|
|
dict(num_class=2, class_names=['motorcycle', 'bicycle']),
|
|
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
|
|
],
|
|
common_heads=dict(
|
|
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
|
|
share_conv_channel=64,
|
|
bbox_coder=dict(
|
|
type='CenterPointBBoxCoder',
|
|
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
|
|
pc_range=point_cloud_range[:2],
|
|
max_num=500,
|
|
score_threshold=0.1,
|
|
out_size_factor=4,
|
|
voxel_size=voxel_size[:2],
|
|
code_size=9),
|
|
separate_head=dict(
|
|
type='SeparateHead', init_bias=-2.19, final_kernel=3),
|
|
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
|
|
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
|
|
norm_bbox=True),
|
|
# model training and testing settings
|
|
train_cfg=dict(
|
|
pts=dict(
|
|
grid_size=[512, 512, 1],
|
|
voxel_size=voxel_size,
|
|
out_size_factor=4,
|
|
dense_reg=1,
|
|
gaussian_overlap=0.1,
|
|
max_objs=500,
|
|
min_radius=2,
|
|
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
|
|
test_cfg=dict(
|
|
pts=dict(
|
|
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
|
|
max_per_img=500,
|
|
max_pool_nms=False,
|
|
min_radius=[4, 12, 10, 1, 0.85, 0.175],
|
|
pc_range=point_cloud_range[:2],
|
|
score_threshold=0.1,
|
|
out_size_factor=4,
|
|
voxel_size=voxel_size[:2],
|
|
nms_type='circle',
|
|
pre_max_size=1000,
|
|
post_max_size=83,
|
|
nms_thr=0.2)))
|
|
voxel_size = [0.25, 0.25, 8]
|
|
pointpillars_nus_model = dict(
|
|
pts_voxel_layer=dict(
|
|
max_num_points=64,
|
|
point_cloud_range=[-50, -50, -5, 50, 50, 3],
|
|
voxel_size=voxel_size,
|
|
max_voxels=(30000, 40000)),
|
|
pts_voxel_encoder=dict(
|
|
type='HardVFE',
|
|
in_channels=4,
|
|
feat_channels=[64, 64],
|
|
with_distance=False,
|
|
voxel_size=voxel_size,
|
|
with_cluster_center=True,
|
|
with_voxel_center=True,
|
|
point_cloud_range=[-50, -50, -5, 50, 50, 3],
|
|
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
|
|
pts_middle_encoder=dict(
|
|
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
|
|
pts_backbone=dict(
|
|
type='SECOND',
|
|
in_channels=64,
|
|
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
|
|
layer_nums=[3, 5, 5],
|
|
layer_strides=[2, 2, 2],
|
|
out_channels=[64, 128, 256]),
|
|
pts_neck=dict(
|
|
type='FPN',
|
|
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
|
|
act_cfg=dict(type='ReLU'),
|
|
in_channels=[64, 128, 256],
|
|
out_channels=256,
|
|
start_level=0,
|
|
num_outs=3),
|
|
pts_bbox_head=dict(
|
|
type='Anchor3DHead',
|
|
num_classes=10,
|
|
in_channels=256,
|
|
feat_channels=256,
|
|
use_direction_classifier=True,
|
|
anchor_generator=dict(
|
|
type='AlignedAnchor3DRangeGenerator',
|
|
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
|
|
scales=[1, 2, 4],
|
|
sizes=[
|
|
[2.5981, 0.8660, 1.], # 1.5 / sqrt(3)
|
|
[1.7321, 0.5774, 1.], # 1 / sqrt(3)
|
|
[1., 1., 1.],
|
|
[0.4, 0.4, 1],
|
|
],
|
|
custom_values=[0, 0],
|
|
rotations=[0, 1.57],
|
|
reshape_out=True),
|
|
assigner_per_size=False,
|
|
diff_rad_by_sin=True,
|
|
dir_offset=-0.7854, # -pi / 4
|
|
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
|
|
loss_cls=dict(
|
|
type='FocalLoss',
|
|
use_sigmoid=True,
|
|
gamma=2.0,
|
|
alpha=0.25,
|
|
loss_weight=1.0),
|
|
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
|
|
loss_dir=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
|
|
# model training and testing settings
|
|
train_cfg=dict(
|
|
pts=dict(
|
|
assigner=dict(
|
|
type='MaxIoUAssigner',
|
|
iou_calculator=dict(type='BboxOverlapsNearest3D'),
|
|
pos_iou_thr=0.6,
|
|
neg_iou_thr=0.3,
|
|
min_pos_iou=0.3,
|
|
ignore_iof_thr=-1),
|
|
allowed_border=0,
|
|
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
|
|
pos_weight=-1,
|
|
debug=False)),
|
|
test_cfg=dict(
|
|
pts=dict(
|
|
use_rotate_nms=True,
|
|
nms_across_levels=False,
|
|
nms_pre=1000,
|
|
nms_thr=0.2,
|
|
score_thr=0.05,
|
|
min_bbox_size=0,
|
|
max_num=500)))
|