# encoding: utf-8 """ @author: xingyu liao @contact: sherlockliao01@gmail.com Create custom calibrator, use to calibrate int8 TensorRT model. Need to override some methods of trt.IInt8EntropyCalibrator2, such as get_batch_size, get_batch, read_calibration_cache, write_calibration_cache. """ # based on: # https://github.com/qq995431104/Pytorch2TensorRT/blob/master/myCalibrator.py import os import sys import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit import numpy as np import torchvision.transforms as T sys.path.append('../..') from fastreid.data.build import _root from fastreid.data.data_utils import read_image from fastreid.data.datasets import DATASET_REGISTRY import logging from fastreid.data.transforms import ToTensor logger = logging.getLogger('trt_export.calibrator') class FeatEntropyCalibrator(trt.IInt8EntropyCalibrator2): def __init__(self, args): trt.IInt8EntropyCalibrator2.__init__(self) self.cache_file = 'reid_feat.cache' self.batch_size = args.batch_size self.channel = args.channel self.height = args.height self.width = args.width self.transform = T.Compose([ T.Resize((self.height, self.width), interpolation=3), # [h,w] ToTensor(), ]) dataset = DATASET_REGISTRY.get(args.calib_data)(root=_root) self._data_items = dataset.train + dataset.query + dataset.gallery np.random.shuffle(self._data_items) self.imgs = [item[0] for item in self._data_items] self.batch_idx = 0 self.max_batch_idx = len(self.imgs) // self.batch_size self.data_size = self.batch_size * self.channel * self.height * self.width * trt.float32.itemsize self.device_input = cuda.mem_alloc(self.data_size) def next_batch(self): if self.batch_idx < self.max_batch_idx: batch_files = self.imgs[self.batch_idx * self.batch_size:(self.batch_idx + 1) * self.batch_size] batch_imgs = np.zeros((self.batch_size, self.channel, self.height, self.width), dtype=np.float32) for i, f in enumerate(batch_files): img = read_image(f) img = self.transform(img).numpy() assert (img.nbytes == self.data_size // self.batch_size), 'not valid img!' + f batch_imgs[i] = img self.batch_idx += 1 logger.info("batch:[{}/{}]".format(self.batch_idx, self.max_batch_idx)) return np.ascontiguousarray(batch_imgs) else: return np.array([]) def get_batch_size(self): return self.batch_size def get_batch(self, names, p_str=None): try: batch_imgs = self.next_batch() batch_imgs = batch_imgs.ravel() if batch_imgs.size == 0 or batch_imgs.size != self.batch_size * self.channel * self.height * self.width: return None cuda.memcpy_htod(self.device_input, batch_imgs.astype(np.float32)) return [int(self.device_input)] except: return None def read_calibration_cache(self): # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None. if os.path.exists(self.cache_file): with open(self.cache_file, "rb") as f: return f.read() def write_calibration_cache(self, cache): with open(self.cache_file, "wb") as f: f.write(cache)