diff --git a/docs/ncnn_deployment.md b/docs/ncnn_deployment.md
index 6f578059f..42b45e8d3 100644
--- a/docs/ncnn_deployment.md
+++ b/docs/ncnn_deployment.md
@@ -66,3 +66,19 @@ Before starting this tutorial, you should make sure that the prerequisites menti
     Undefined symbol: __cpu_model
     ```
     This is a bug of gcc-5, you should update to `gcc >= 6`
+
+## Performance Test
+
+### MMCls
+This table shows the performance of mmclassification models deployed on ncnn.
+
+Dataset: ImageNet `val` dataset.
+
+| Model | Top-1(%) | Top-5(%) |
+|-------|----------|----------|
+| MobileNetV2| 71.86 (71.86) | 90.42 (90.42) |
+| ResNet | 69.88 (70.07) | 89.34 (89.44) |
+| ResNeXt | 78.61 (78.71) | 94.17 (94.12) |
+
+The data in the parentheses is the inference result from pytorch.
+(According to: [mmcls model_zoo docs](https://github.com/open-mmlab/mmclassification/blob/master/docs/model_zoo.md))
diff --git a/mmdeploy/mmcls/export/model_wrappers.py b/mmdeploy/mmcls/export/model_wrappers.py
index ddc2d01ce..3fc789507 100644
--- a/mmdeploy/mmcls/export/model_wrappers.py
+++ b/mmdeploy/mmcls/export/model_wrappers.py
@@ -107,20 +107,23 @@ class NCNNClassifier(DeployBaseClassifier):
     def forward_test(self, imgs, *args, **kwargs):
         import ncnn
         assert len(imgs.shape) == 4
-        # Only for batch == 1 now.
-        assert imgs.shape[0] == 1
-        input_data = imgs[0].cpu().numpy()
-        input_data = ncnn.Mat(input_data)
-        if self.device_id == -1:
-            ex = self.net.create_extractor()
-            ex.input('input', input_data)
-            ret, results = ex.extract('output')
-            results = np.array(results)
-            assert ret != -100, 'Memory allocation failed in ncnn layers'
-            assert ret == 0
-            return [results]
-        else:
-            raise NotImplementedError('GPU device is not implemented.')
+        batch_size = imgs.shape[0]
+        results_list = []
+        for idx in range(batch_size):
+            input_data = imgs[idx].cpu().numpy()
+            input_data = ncnn.Mat(input_data)
+            if self.device_id == -1:
+                extractor = self.net.create_extractor()
+                extractor.input('input', input_data)
+                return_status, results = extractor.extract('output')
+                results = np.array(results)
+                assert return_status != -100, \
+                    'Memory allocation failed in ncnn layers'
+                assert return_status == 0
+                results_list.append(results)
+            else:
+                raise NotImplementedError('GPU device is not implemented.')
+        return results_list
 
 
 class PPLClassifier(DeployBaseClassifier):
diff --git a/tools/test.py b/tools/test.py
index afb69f07e..c8e67c2ed 100644
--- a/tools/test.py
+++ b/tools/test.py
@@ -14,7 +14,8 @@ def parse_args():
         description='MMDeploy test (and eval) a backend.')
     parser.add_argument('deploy_cfg', help='Deploy config path')
     parser.add_argument('model_cfg', help='Model config path')
-    parser.add_argument('model', help='Input model file.')
+    parser.add_argument(
+        '--model', type=str, nargs='+', help='Input model files.')
     parser.add_argument('--out', help='output result file in pickle format')
     parser.add_argument(
         '--format-only',
@@ -66,7 +67,6 @@ def main():
     args = parse_args()
     if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
         raise ValueError('The output file must be a pkl file.')
-
     deploy_cfg_path = args.deploy_cfg
     model_cfg_path = args.model_cfg
 
@@ -82,13 +82,12 @@ def main():
     # load the model of the backend
     device_id = -1 if args.device == 'cpu' else 0
     backend = deploy_cfg.get('backend', 'default')
-    model = init_backend_model([args.model],
-                               codebase=codebase,
-                               backend=backend,
-                               class_names=get_classes_from_config(
-                                   codebase, model_cfg),
-                               device_id=device_id)
-
+    model = init_backend_model(
+        args.model,
+        codebase=codebase,
+        backend=backend,
+        class_names=get_classes_from_config(codebase, model_cfg),
+        device_id=device_id)
     model = MMDataParallel(model, device_ids=[0])
     outputs = single_gpu_test(codebase, model, data_loader, args.show,
                               args.show_dir, args.show_score_thr)