From 2e27f8b678a93c0137139874975192bbed37527c Mon Sep 17 00:00:00 2001
From: CSH <40987381+csatsurnh@users.noreply.github.com>
Date: Wed, 15 Feb 2023 19:02:00 +0800
Subject: [PATCH] [Enhancement]Replace numpy ascontiguousarray with torch
 contiguous to speed-up (#2604)

## Motivation

Original motivation was after [MMDetection PR
#9533](https://github.com/open-mmlab/mmdetection/pull/9533)

With several experiments I found out that if a ndarray is contiguous,
numpy.transpose + torch.contiguous perform better, while if not, then
use numpy.ascontiguousarray + numpy.transpose

## Modification

Replace numpy.ascontiguousarray with torch.contiguous in
[PackSegInputs](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/datasets/transforms/formatting.py)

Co-authored-by: MeowZheng <meowzheng@outlook.com>
---
 mmseg/datasets/transforms/formatting.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mmseg/datasets/transforms/formatting.py b/mmseg/datasets/transforms/formatting.py
index f4018f788..4391161df 100644
--- a/mmseg/datasets/transforms/formatting.py
+++ b/mmseg/datasets/transforms/formatting.py
@@ -63,8 +63,12 @@ class PackSegInputs(BaseTransform):
             img = results['img']
             if len(img.shape) < 3:
                 img = np.expand_dims(img, -1)
-            img = np.ascontiguousarray(img.transpose(2, 0, 1))
-            packed_results['inputs'] = to_tensor(img)
+            if not img.flags.c_contiguous:
+                img = to_tensor(np.ascontiguousarray(img.transpose(2, 0, 1)))
+            else:
+                img = img.transpose(2, 0, 1)
+                img = to_tensor(img).contiguous()
+            packed_results['inputs'] = img
 
         data_sample = SegDataSample()
         if 'gt_seg_map' in results: