diff --git a/ppcls/configs/MultiLabelCOCO/MLDecoder/CLIP_vit_base_patch16_448_ml_decoder_448.yaml b/ppcls/configs/MultiLabelCOCO/MLDecoder/CLIP_vit_base_patch16_448_ml_decoder_448.yaml new file mode 100644 index 000000000..debbdc4c5 --- /dev/null +++ b/ppcls/configs/MultiLabelCOCO/MLDecoder/CLIP_vit_base_patch16_448_ml_decoder_448.yaml @@ -0,0 +1,172 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 1 + epochs: 40 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 448, 448] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_multilabel: True + +# model ema +EMA: + decay: 0.9997 + +# mixed precision +AMP: + use_amp: False + use_fp16_test: False + scale_loss: 128.0 + use_dynamic_loss_scaling: True + use_promote: False + # O1: mixed fp16, O2: pure fp16 + level: O2 + +# model architecture +Arch: + name: CLIP_vit_base_patch16_224 + class_num: 80 + return_embed: False + use_fused_attn: False # fused attn can be used in AMP O2 mode only + pretrained: True + use_ml_decoder: True + +# ml-decoder head +MLDecoder: + query_num: 80 # default: 80, query_num <= class_num + in_channels: 768 + remove_layers: [] + replace_layer: 'head' + +# loss function config for training/eval process +Loss: + Train: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + + Eval: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 1e-4 + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-5 + eta_min: 1e-10 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/train.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - Cutout: + length: 224 + fill_value: none + - RandAugmentV4: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 16 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/val.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 8 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Infer: + infer_imgs: deploy/images/coco_000000570688.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: MultiLabelThreshOutput + threshold: 0.5 + class_id_map_file: dataset/coco_ml/label.txt + +Metric: + Train: + Eval: + - MultiLabelMAP: + # support list: integral, 11point + # default: integral + map_type: integral diff --git a/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B0_ml_decoder_448.yaml b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B0_ml_decoder_448.yaml new file mode 100644 index 000000000..4cb9faf4c --- /dev/null +++ b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B0_ml_decoder_448.yaml @@ -0,0 +1,168 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 1 + epochs: 40 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 448, 448] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_multilabel: True + +# model ema +EMA: + decay: 0.9997 + +# mixed precision +AMP: + use_amp: True + use_fp16_test: False + scale_loss: 128.0 + use_dynamic_loss_scaling: True + use_promote: False + # O1: mixed fp16, O2: pure fp16 + level: O1 + +# model architecture +Arch: + name: PPHGNetV2_B0 + class_num: 80 + pretrained: True # ssld pretrained + use_ml_decoder: True + +# ml-decoder head +MLDecoder: + query_num: 80 # default: 80, query_num <= class_num + in_channels: 2048 + +# loss function config for training/eval process +Loss: + Train: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + + Eval: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 1e-4 + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-4 + eta_min: 1e-10 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/train.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - Cutout: + length: 224 + fill_value: none + - RandAugmentV4: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/val.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Infer: + infer_imgs: deploy/images/coco_000000570688.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: MultiLabelThreshOutput + threshold: 0.5 + class_id_map_file: dataset/coco_ml/label.txt + +Metric: + Train: + Eval: + - MultiLabelMAP: + # support list: integral, 11point + # default: integral + map_type: integral diff --git a/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B4_ml_decoder_448.yaml b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B4_ml_decoder_448.yaml new file mode 100644 index 000000000..1e0e9ab5c --- /dev/null +++ b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B4_ml_decoder_448.yaml @@ -0,0 +1,168 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 1 + epochs: 40 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 448, 448] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_multilabel: True + +# model ema +EMA: + decay: 0.9997 + +# mixed precision +AMP: + use_amp: True + use_fp16_test: False + scale_loss: 128.0 + use_dynamic_loss_scaling: True + use_promote: False + # O1: mixed fp16, O2: pure fp16 + level: O1 + +# model architecture +Arch: + name: PPHGNetV2_B4 + class_num: 80 + pretrained: True # ssld pretrained + use_ml_decoder: True + +# ml-decoder head +MLDecoder: + query_num: 80 # default: 80, query_num <= class_num + in_channels: 2048 + +# loss function config for training/eval process +Loss: + Train: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + + Eval: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 1e-4 + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-4 + eta_min: 1e-10 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/train.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - Cutout: + length: 224 + fill_value: none + - RandAugmentV4: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/val.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 16 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Infer: + infer_imgs: deploy/images/coco_000000570688.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: MultiLabelThreshOutput + threshold: 0.5 + class_id_map_file: dataset/coco_ml/label.txt + +Metric: + Train: + Eval: + - MultiLabelMAP: + # support list: integral, 11point + # default: integral + map_type: integral diff --git a/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B6_ml_decoder_448.yaml b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B6_ml_decoder_448.yaml new file mode 100644 index 000000000..a4caffb1d --- /dev/null +++ b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-HGNetV2-B6_ml_decoder_448.yaml @@ -0,0 +1,168 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 1 + epochs: 40 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 448, 448] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_multilabel: True + +# model ema +EMA: + decay: 0.9997 + +# mixed precision +AMP: + use_amp: True + use_fp16_test: False + scale_loss: 128.0 + use_dynamic_loss_scaling: True + use_promote: False + # O1: mixed fp16, O2: pure fp16 + level: O1 + +# model architecture +Arch: + name: PPHGNetV2_B6 + class_num: 80 + pretrained: True # ssld pretrained + use_ml_decoder: True + +# ml-decoder head +MLDecoder: + query_num: 80 # default: 80, query_num <= class_num + in_channels: 2048 + +# loss function config for training/eval process +Loss: + Train: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + + Eval: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 1e-4 + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-4 + eta_min: 1e-10 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/train.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - Cutout: + length: 224 + fill_value: none + - RandAugmentV4: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/val.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 8 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Infer: + infer_imgs: deploy/images/coco_000000570688.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: MultiLabelThreshOutput + threshold: 0.5 + class_id_map_file: dataset/coco_ml/label.txt + +Metric: + Train: + Eval: + - MultiLabelMAP: + # support list: integral, 11point + # default: integral + map_type: integral diff --git a/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-LCNet_x1_0_ml_decoder_448.yaml b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-LCNet_x1_0_ml_decoder_448.yaml new file mode 100644 index 000000000..d3a8011d2 --- /dev/null +++ b/ppcls/configs/MultiLabelCOCO/MLDecoder/PP-LCNet_x1_0_ml_decoder_448.yaml @@ -0,0 +1,170 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 1 + epochs: 40 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 448, 448] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_multilabel: True + +# model ema +EMA: + decay: 0.9997 + +# mixed precision +AMP: + use_amp: True + use_fp16_test: False + scale_loss: 128.0 + use_dynamic_loss_scaling: True + use_promote: False + # O1: mixed fp16, O2: pure fp16 + level: O1 + +# model architecture +Arch: + name: PPLCNet_x1_0 + class_num: 80 + pretrained: True + use_ml_decoder: True + +# ml-decoder head +MLDecoder: + query_num: 80 # default: 80, query_num <= class_num + class_num: 80 + in_channels: 1280 + + +# loss function config for training/eval process +Loss: + Train: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + + Eval: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 1e-4 + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-4 + eta_min: 1e-10 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/train.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - Cutout: + length: 224 + fill_value: none + - RandAugmentV4: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/val.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Infer: + infer_imgs: deploy/images/coco_000000570688.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: MultiLabelThreshOutput + threshold: 0.5 + class_id_map_file: dataset/coco_ml/label.txt + +Metric: + Train: + Eval: + - MultiLabelMAP: + # support list: integral, 11point + # default: integral + map_type: integral diff --git a/ppcls/configs/MultiLabelCOCO/MLDecoder/ResNet50_ml_decoder_448.yaml b/ppcls/configs/MultiLabelCOCO/MLDecoder/ResNet50_ml_decoder_448.yaml new file mode 100644 index 000000000..3f9e9a800 --- /dev/null +++ b/ppcls/configs/MultiLabelCOCO/MLDecoder/ResNet50_ml_decoder_448.yaml @@ -0,0 +1,168 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 1 + epochs: 40 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 448, 448] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_multilabel: True + +# model ema +EMA: + decay: 0.9997 + +# mixed precision +AMP: + use_amp: True + use_fp16_test: False + scale_loss: 128.0 + use_dynamic_loss_scaling: True + use_promote: False + # O1: mixed fp16, O2: pure fp16 + level: O1 + +# model architecture +Arch: + name: ResNet50 + class_num: 80 + pretrained: True + use_ml_decoder: True + +# ml-decoder head +MLDecoder: + query_num: 80 # default: 80, query_num <= class_num + in_channels: 2048 + +# loss function config for training/eval process +Loss: + Train: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + + Eval: + - MultiLabelAsymmetricLoss: + weight: 1.0 + gamma_pos: 0 + gamma_neg: 4 + clip: 0.05 + disable_focal_loss_grad: True + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 1e-4 + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-4 + eta_min: 1e-10 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/train.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - Cutout: + length: 224 + fill_value: none + - RandAugmentV4: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: MultiLabelDataset + image_root: dataset/coco_ml/images + cls_label_path: dataset/coco_ml/val.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 16 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Infer: + infer_imgs: deploy/images/coco_000000570688.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 448 + interpolation: bilinear + backend: pil + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: MultiLabelThreshOutput + threshold: 0.5 + class_id_map_file: dataset/coco_ml/label.txt + +Metric: + Train: + Eval: + - MultiLabelMAP: + # support list: integral, 11point + # default: integral + map_type: integral diff --git a/ppcls/data/preprocess/ops/cutout.py b/ppcls/data/preprocess/ops/cutout.py index 8d0760f1f..7519ce844 100644 --- a/ppcls/data/preprocess/ops/cutout.py +++ b/ppcls/data/preprocess/ops/cutout.py @@ -50,6 +50,6 @@ class Cutout(object): random.randint(0, 255), random.randint(0, 255)] - img = cv2.rectangle(img, (x1, y1), (x2, y2), fill_value, -1) + img = cv2.rectangle(np.array(img), (x1, y1), (x2, y2), fill_value, -1) return img