[v005] set INT8 calibrate set via cmake (#459)

Reviewed by: @L1aoXingyu
pull/468/head
Darren 2021-04-19 09:22:48 +07:00 committed by GitHub
parent e124a9afd3
commit 8276ccf4fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 120 additions and 92 deletions

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "projects/FastRT/pybind_interface/pybind11"]
path = projects/FastRT/pybind_interface/pybind11
url = https://github.com/pybind/pybind11.git

View File

@ -4,7 +4,7 @@ set(LIBARARY_NAME "FastRT" CACHE STRING "The Fastreid-tensorrt library name")
set(LIBARARY_VERSION_MAJOR "0") set(LIBARARY_VERSION_MAJOR "0")
set(LIBARARY_VERSION_MINOR "0") set(LIBARARY_VERSION_MINOR "0")
set(LIBARARY_VERSION_SINOR "4") set(LIBARARY_VERSION_SINOR "5")
set(LIBARARY_SOVERSION "0") set(LIBARARY_SOVERSION "0")
set(LIBARARY_VERSION "${LIBARARY_VERSION_MAJOR}.${LIBARARY_VERSION_MINOR}.${LIBARARY_VERSION_SINOR}") set(LIBARARY_VERSION "${LIBARARY_VERSION_MAJOR}.${LIBARARY_VERSION_MINOR}.${LIBARARY_VERSION_SINOR}")
project(${LIBARARY_NAME}${LIBARARY_VERSION}) project(${LIBARARY_NAME}${LIBARARY_VERSION})
@ -35,12 +35,19 @@ option(BUILD_INT8 "Build Engine as INT8" OFF)
option(USE_CNUMPY "Include CNPY libs" OFF) option(USE_CNUMPY "Include CNPY libs" OFF)
option(BUILD_PYTHON_INTERFACE "Build Python Interface" OFF) option(BUILD_PYTHON_INTERFACE "Build Python Interface" OFF)
set(SOLUTION_DIR ${CMAKE_CURRENT_SOURCE_DIR})
message("CMAKE_CURRENT_SOURCE_DIR: " ${SOLUTION_DIR})
if(USE_CNUMPY) if(USE_CNUMPY)
add_definitions(-DUSE_CNUMPY) add_definitions(-DUSE_CNUMPY)
endif() endif()
if(BUILD_INT8) if(BUILD_INT8)
add_definitions(-DBUILD_INT8) add_definitions(-DBUILD_INT8)
message("Build Engine as INT8") message("Build Engine as INT8")
set(INT8_CALIBRATE_DATASET_PATH "/data/Market-1501-v15.09.15/bounding_box_test/" CACHE STRING "Path to calibrate dataset(end with /)")
message("INT8_CALIBRATE_DATASET_PATH: " ${INT8_CALIBRATE_DATASET_PATH})
configure_file(${SOLUTION_DIR}/include/fastrt/config.h.in ${SOLUTION_DIR}/include/fastrt/config.h @ONLY)
elseif(BUILD_FP16) elseif(BUILD_FP16)
add_definitions(-DBUILD_FP16) add_definitions(-DBUILD_FP16)
message("Build Engine as FP16") message("Build Engine as FP16")
@ -48,9 +55,6 @@ else()
message("Build Engine as FP32") message("Build Engine as FP32")
endif() endif()
set(SOLUTION_DIR ${CMAKE_CURRENT_SOURCE_DIR})
message("CMAKE_CURRENT_SOURCE_DIR: " ${SOLUTION_DIR})
if(BUILD_FASTRT_ENGINE) if(BUILD_FASTRT_ENGINE)
add_subdirectory(fastrt) add_subdirectory(fastrt)
message(STATUS "BUILD_FASTREID_ENGINE: ON") message(STATUS "BUILD_FASTREID_ENGINE: ON")
@ -70,4 +74,4 @@ if(BUILD_PYTHON_INTERFACE)
message(STATUS "BUILD_PYTHON_INTERFACE: ON") message(STATUS "BUILD_PYTHON_INTERFACE: ON")
else() else()
message(STATUS "BUILD_PYTHON_INTERFACE: OFF") message(STATUS "BUILD_PYTHON_INTERFACE: OFF")
endif() endif()

View File

@ -48,34 +48,37 @@ So we don't use any parsers here.
``` ```
mkdir build mkdir build
cd build cd build
cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_FP16=ON .. cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_FP16=ON ..
make make
``` ```
then go to [step 5](#step5) then go to [step 5](#step5)
8. (Optional) You can use INT8 quantization for speed up 8. (Optional) You can use INT8 quantization for speed up
First, modify the source code to specify your calibrate dataset path. In `FastRT/fastrt/meta_arch/model.cpp`, line 91. prepare CALIBRATE DATASET and set the path via cmake. (The path must end with /)
```
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, w, h, PATH_TO_YOUR_DATASET, "int8calib.table", p);
```
Then build.
``` ```
mkdir build mkdir build
cd build cd build
cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_INT8=ON .. cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_INT8=ON \
-DINT8_CALIBRATE_DATASET_PATH="/data/Market-1501-v15.09.15/bounding_box_test/" ..
make make
``` ```
then go to [step 5](#step5)
then go to [step 5](#step5)
9. (Optional) Build tensorrt model as shared libs 9. (Optional) Build tensorrt model as shared libs
``` ```
mkdir build mkdir build
cd build cd build
cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=OFF -DBUILD_FP16=ON .. cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=OFF \
-DBUILD_FP16=ON ..
make make
make install make install
``` ```
@ -87,31 +90,37 @@ So we don't use any parsers here.
make make
``` ```
then go to [step 5](#step5) then go to [step 5](#step5)
10. (Optional) Build tensorrt model with python interface, then you can use FastRT model in python. 10. (Optional) Build tensorrt model with python interface, then you can use FastRT model in python.
First get the pybind lib, run `git submodule update --init --recursive`.
``` ```
mkdir build mkdir build
cd build cd build
cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_PYTHON_INTERFACE=ON -DPYTHON_EXECUTABLE=$(which python) .. cmake -DBUILD_FASTRT_ENGINE=ON \
make -DBUILD_DEMO=ON \
``` -DBUILD_PYTHON_INTERFACE=ON ..
You should get a so file `FastRT/build/pybind_interface/ReID.cpython-36m-x86_64-linux-gnu.so`. make
```
Then go to [step 5](#step5) to create engine file. After that you can import this so file in python, and deserialize engine file to infer in python. You can find use example in `pybind_interface/test.py` and `pybind_interface/market_benchmark.py`.
``` You should get a so file `FastRT/build/pybind_interface/ReID.cpython-37m-x86_64-linux-gnu.so`.
from PATH_TO_SO_FILE import ReID
model = ReID(GPU_ID)
model.build(PATH_TO_YOUR_ENGINEFILE)
numpy_feature = np.array([model.infer(CV2_FRAME)])
```
Then go to [step 5](#step5) to create engine file.
After that you can import this so file in python, and deserialize engine file to infer in python.
You can find use example in `pybind_interface/test.py` and `pybind_interface/market_benchmark.py`.
```
from PATH_TO_SO_FILE import ReID
model = ReID(GPU_ID)
model.build(PATH_TO_YOUR_ENGINEFILE)
numpy_feature = np.array([model.infer(CV2_FRAME)])
```
* `pybind_interface/test.py` use `pybind_interface/docker/trt7cu100/Dockerfile` (without pytorch installed)
* `pybind_interface/market_benchmark.py` use `pybind_interface/docker/trt7cu102_torch160/Dockerfile` (with pytorch installed)
### <a name="ConfigSection"></a>`Tensorrt Model Config` ### <a name="ConfigSection"></a>`Tensorrt Model Config`
Edit `FastRT/demo/inference.cpp`, according to your model config Edit `FastRT/demo/inference.cpp`, according to your model config
@ -124,7 +133,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine"; static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
static const int MAX_BATCH_SIZE = 4; static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 256; static const int INPUT_H = 384;
static const int INPUT_W = 128; static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048; static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0; static const int DEVICE_ID = 0;
@ -144,7 +153,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50.wts";
static const std::string ENGINE_PATH = "./sbs_R50.engine"; static const std::string ENGINE_PATH = "./sbs_R50.engine";
static const int MAX_BATCH_SIZE = 4; static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 256; static const int INPUT_H = 384;
static const int INPUT_W = 128; static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048; static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0; static const int DEVICE_ID = 0;
@ -164,7 +173,7 @@ static const std::string WEIGHTS_PATH = "../sbs_r34_distill.wts";
static const std::string ENGINE_PATH = "./sbs_r34_distill.engine"; static const std::string ENGINE_PATH = "./sbs_r34_distill.engine";
static const int MAX_BATCH_SIZE = 4; static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 256; static const int INPUT_H = 384;
static const int INPUT_W = 128; static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512; static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 0; static const int DEVICE_ID = 0;
@ -184,7 +193,7 @@ static const std::string WEIGHTS_PATH = "../kd_r34_distill.wts";
static const std::string ENGINE_PATH = "./kd_r34_distill.engine"; static const std::string ENGINE_PATH = "./kd_r34_distill.engine";
static const int MAX_BATCH_SIZE = 4; static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 256; static const int INPUT_H = 384;
static const int INPUT_W = 128; static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512; static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 0; static const int DEVICE_ID = 0;

View File

@ -16,7 +16,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine"; static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
static const int MAX_BATCH_SIZE = 4; static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 256; static const int INPUT_H = 384;
static const int INPUT_W = 128; static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048; static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0; static const int DEVICE_ID = 0;

View File

@ -1,6 +1,9 @@
#include "fastrt/model.h" #include "fastrt/model.h"
#include "fastrt/calibrator.h" #include "fastrt/calibrator.h"
#ifdef BUILD_INT8
#include "fastrt/config.h"
#endif
namespace fastrt { namespace fastrt {
@ -73,23 +76,17 @@ namespace fastrt {
#if defined(BUILD_FP16) && defined(BUILD_INT8) #if defined(BUILD_FP16) && defined(BUILD_INT8)
std::cout << "Flag confilct! BUILD_FP16 and BUILD_INT8 can't be both True!" << std::endl; std::cout << "Flag confilct! BUILD_FP16 and BUILD_INT8 can't be both True!" << std::endl;
return null; return null;
#endif #endif
#ifdef BUILD_FP16 #if defined(BUILD_FP16)
std::cout << "[Build fp16]" << std::endl; std::cout << "[Build fp16]" << std::endl;
config->setFlag(BuilderFlag::kFP16); config->setFlag(BuilderFlag::kFP16);
#endif #elif defined(BUILD_INT8)
#ifdef BUILD_INT8
std::cout << "[Build int8]" << std::endl; std::cout << "[Build int8]" << std::endl;
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
assert(builder->platformHasFastInt8()); TRTASSERT(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8); config->setFlag(BuilderFlag::kINT8);
int w = _engineCfg.input_w; Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, _engineCfg.input_w, _engineCfg.input_h,
int h = _engineCfg.input_h; INT8_CALIBRATE_DATASET_PATH.c_str(), "int8calib.table", _engineCfg.input_name.c_str());
char*p = (char*)_engineCfg.input_name.data();
//path must end with /
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, w, h,
"/data/person_reid/data/Market-1501-v15.09.15/bounding_box_test/", "int8calib.table", p);
config->setInt8Calibrator(calibrator); config->setInt8Calibrator(calibrator);
#endif #endif
auto engine = make_holder(builder->buildEngineWithConfig(*network, *config)); auto engine = make_holder(builder->buildEngineWithConfig(*network, *config));
@ -147,4 +144,4 @@ namespace fastrt {
int Model::getDeviceID() { int Model::getDeviceID() {
return _engineCfg.device_id; return _engineCfg.device_id;
} }
} }

View File

@ -0,0 +1,7 @@
#pragma once
#ifdef BUILD_INT8
#include <string>
const std::string INT8_CALIBRATE_DATASET_PATH = "@INT8_CALIBRATE_DATASET_PATH@";
#endif

View File

@ -1,7 +1,7 @@
SET(APP_PROJECT_NAME ReID) SET(APP_PROJECT_NAME ReID)
# pybind # pybind
add_subdirectory(pybind11) find_package(pybind11)
find_package(CUDA REQUIRED) find_package(CUDA REQUIRED)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different # include and link dirs of cuda and tensorrt, you need adapt them if yours are different

View File

@ -1,17 +1,39 @@
# cuda10.0 # cuda10.0
FROM fineyu/tensorrt7:0.0.1 FROM fineyu/tensorrt7:0.0.1
ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \
build-essential \
RUN apt-get update && apt-get install -y software-properties-common software-properties-common \
cmake \
wget \
python3.7-dev python3-pip
RUN add-apt-repository -y ppa:timsc/opencv-3.4 && \ RUN add-apt-repository -y ppa:timsc/opencv-3.4 && \
apt-get update && \ apt-get update && \
apt-get install -y cmake \ apt-get install -y \
libopencv-dev \ libopencv-dev \
libopencv-dnn-dev \ libopencv-dnn-dev \
libopencv-shape3.4-dbg && \ libopencv-shape3.4-dbg && \
rm -rf /var/lib/apt/lists/* apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && rm get-pip.py && pip3 install torch==1.6.0 torchvision tensorboard matplotlib scipy Pillow numpy prettytable easydict opencv-python \ RUN wget https://bootstrap.pypa.io/get-pip.py && \
scikit-learn pyyaml yacs termcolor tabulate tensorboard opencv-python pyyaml yacs termcolor tabulate gdown faiss-cpu python3 get-pip.py --force-reinstall && \
rm get-pip.py
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 && \
update-alternatives --set python3 /usr/bin/python3.7
RUN pip install pytest opencv-python
RUN cd /usr/local/src && \
wget https://github.com/pybind/pybind11/archive/v2.2.3.tar.gz && \
tar xvf v2.2.3.tar.gz && \
cd pybind11-2.2.3 && \
mkdir build && \
cd build && \
cmake .. && \
make -j12 && \
make install && \
cd ../.. && \
rm -rf pybind11-2.2.3 && \
rm -rf v2.2.3.tar.gz

View File

@ -1,17 +0,0 @@
# cuda10.2
FROM nvcr.io/nvidia/tensorrt:20.03-py3
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y software-properties-common
RUN add-apt-repository -y ppa:timsc/opencv-3.4 && \
apt-get update && \
apt-get install -y cmake \
libopencv-dev \
libopencv-dnn-dev \
libopencv-shape3.4-dbg && \
rm -rf /var/lib/apt/lists/*
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && rm get-pip.py && pip3 install torch==1.6.0 torchvision tensorboard matplotlib scipy Pillow numpy prettytable easydict opencv-python \
scikit-learn pyyaml yacs termcolor tabulate tensorboard opencv-python pyyaml yacs termcolor tabulate gdown faiss-cpu

View File

@ -0,0 +1,10 @@
# cuda10.2
FROM darrenhsieh1717/trt7-cu102-cv34:pybind
RUN pip install torch==1.6.0 torchvision==0.7.0
RUN pip install opencv-python tensorboard cython yacs termcolor scikit-learn tabulate gdown gpustat ipdb h5py fs faiss-gpu
RUN git clone https://github.com/NVIDIA/apex && \
cd apex && \
python3 setup.py install

View File

@ -23,7 +23,7 @@ from fastreid.evaluation.rank import eval_market1501
from build.pybind_interface.ReID import ReID from build.pybind_interface.ReID import ReID
FEATURE_DIM = 512 FEATURE_DIM = 2048
GPU_ID = 0 GPU_ID = 0
def map(wrapper): def map(wrapper):
@ -61,5 +61,5 @@ def map(wrapper):
if __name__ == '__main__': if __name__ == '__main__':
infer = ReID(GPU_ID) infer = ReID(GPU_ID)
infer.build("../build/kd_r18_distill.engine") infer.build("../build/sbs_R50-ibn.engine")
map(infer) map(infer)

@ -1 +0,0 @@
Subproject commit 0e01c243c7ffae3a2e52f998bacfe82f56aa96d9

View File

@ -18,7 +18,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine"; static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
static const int MAX_BATCH_SIZE = 4; static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 256; static const int INPUT_H = 384;
static const int INPUT_W = 128; static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048; static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0; static const int DEVICE_ID = 0;

View File

@ -7,12 +7,12 @@ import time
if __name__ == '__main__': if __name__ == '__main__':
iter_ = 20000 iter_ = 10
m = ReID(0) m = ReID(0)
m.build("../build/kd_r18_distill.engine") m.build("../build/sbs_R50-ibn.engine")
print("build done") print("build done")
frame = cv2.imread("/data/sunp/algorithm/2020_1015_time/pytorchtotensorrt_reid/test/query/0001/0001_c1s1_001051_00.jpg") frame = cv2.imread("../data/Market-1501-v15.09.15/calib_set/-1_c1s2_009916_03.jpg")
m.infer(frame) m.infer(frame)
t0 = time.time() t0 = time.time()