diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 1eb4b19..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "projects/FastRT/pybind_interface/pybind11"]
-	path = projects/FastRT/pybind_interface/pybind11
-	url = https://github.com/pybind/pybind11.git
diff --git a/projects/FastRT/CMakeLists.txt b/projects/FastRT/CMakeLists.txt
index 7f5074e..31ba95d 100755
--- a/projects/FastRT/CMakeLists.txt
+++ b/projects/FastRT/CMakeLists.txt
@@ -4,7 +4,7 @@ set(LIBARARY_NAME "FastRT" CACHE STRING "The Fastreid-tensorrt library name")
 
 set(LIBARARY_VERSION_MAJOR "0")
 set(LIBARARY_VERSION_MINOR "0")
-set(LIBARARY_VERSION_SINOR "4")
+set(LIBARARY_VERSION_SINOR "5")
 set(LIBARARY_SOVERSION "0")
 set(LIBARARY_VERSION "${LIBARARY_VERSION_MAJOR}.${LIBARARY_VERSION_MINOR}.${LIBARARY_VERSION_SINOR}")
 project(${LIBARARY_NAME}${LIBARARY_VERSION})
@@ -35,12 +35,19 @@ option(BUILD_INT8              "Build Engine as INT8"     OFF)
 option(USE_CNUMPY              "Include CNPY libs"        OFF)
 option(BUILD_PYTHON_INTERFACE  "Build Python Interface"   OFF)
 
+set(SOLUTION_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+message("CMAKE_CURRENT_SOURCE_DIR: " ${SOLUTION_DIR})
+
 if(USE_CNUMPY)
   add_definitions(-DUSE_CNUMPY)
 endif()
+
 if(BUILD_INT8)
   add_definitions(-DBUILD_INT8)
   message("Build Engine as INT8")
+  set(INT8_CALIBRATE_DATASET_PATH "/data/Market-1501-v15.09.15/bounding_box_test/" CACHE STRING "Path to calibrate dataset(end with /)")
+  message("INT8_CALIBRATE_DATASET_PATH: " ${INT8_CALIBRATE_DATASET_PATH})
+  configure_file(${SOLUTION_DIR}/include/fastrt/config.h.in ${SOLUTION_DIR}/include/fastrt/config.h @ONLY)
 elseif(BUILD_FP16)
   add_definitions(-DBUILD_FP16)
   message("Build Engine as FP16")
@@ -48,9 +55,6 @@ else()
   message("Build Engine as FP32")
 endif()
 
-set(SOLUTION_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-message("CMAKE_CURRENT_SOURCE_DIR: " ${SOLUTION_DIR})
-
 if(BUILD_FASTRT_ENGINE)
   add_subdirectory(fastrt)
   message(STATUS "BUILD_FASTREID_ENGINE: ON")
@@ -70,4 +74,4 @@ if(BUILD_PYTHON_INTERFACE)
   message(STATUS "BUILD_PYTHON_INTERFACE: ON")
 else()
   message(STATUS "BUILD_PYTHON_INTERFACE: OFF")
-endif()
+endif()
\ No newline at end of file
diff --git a/projects/FastRT/README.md b/projects/FastRT/README.md
index 6188d66..23b3ea1 100755
--- a/projects/FastRT/README.md
+++ b/projects/FastRT/README.md
@@ -48,34 +48,37 @@ So we don't use any parsers here.
    ``` 
    mkdir build
    cd build
-   cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_FP16=ON ..
+   cmake -DBUILD_FASTRT_ENGINE=ON \
+         -DBUILD_DEMO=ON \
+         -DBUILD_FP16=ON ..
    make
    ```
    
-   then go to [step 5](#step5)  
+   then go to [step 5](#step5) 
 
 8. (Optional) You can use INT8 quantization for speed up
 
-   First, modify the source code to specify your calibrate dataset path. In `FastRT/fastrt/meta_arch/model.cpp`, line 91.
-   ```
-   Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, w, h, PATH_TO_YOUR_DATASET, "int8calib.table", p);
-   ```
-   Then build.
+   prepare CALIBRATE DATASET and set the path via cmake. (The path must end with /)
+
    ``` 
    mkdir build
    cd build
-   cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_INT8=ON ..
+   cmake -DBUILD_FASTRT_ENGINE=ON \
+         -DBUILD_DEMO=ON \
+         -DBUILD_INT8=ON \
+         -DINT8_CALIBRATE_DATASET_PATH="/data/Market-1501-v15.09.15/bounding_box_test/" ..
    make
    ```
-   
-   then go to [step 5](#step5)  
+   then go to [step 5](#step5)
 
 9. (Optional) Build tensorrt model as shared libs
 
    ``` 
    mkdir build
    cd build
-   cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=OFF -DBUILD_FP16=ON ..
+   cmake -DBUILD_FASTRT_ENGINE=ON \
+         -DBUILD_DEMO=OFF \
+         -DBUILD_FP16=ON ..
    make
    make install
    ```
@@ -87,31 +90,37 @@ So we don't use any parsers here.
    make
    ```
 
-   then go to [step 5](#step5)  
+   then go to [step 5](#step5)
    
 10. (Optional) Build tensorrt model with python interface, then you can use FastRT model in python.
-   First get the pybind lib, run `git submodule update --init --recursive`.
 
-   ``` 
-   mkdir build
-   cd build
-   cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_PYTHON_INTERFACE=ON -DPYTHON_EXECUTABLE=$(which python) ..
-   make
-   ```
-   You should get a so file `FastRT/build/pybind_interface/ReID.cpython-36m-x86_64-linux-gnu.so`. 
-   
-   Then go to [step 5](#step5) to create engine file. After that you can import this so file in python, and deserialize engine file to infer in python. You can find use example in `pybind_interface/test.py` and `pybind_interface/market_benchmark.py`.
-   ``` 
-   from PATH_TO_SO_FILE import ReID
-   model = ReID(GPU_ID)
-   model.build(PATH_TO_YOUR_ENGINEFILE)
-   numpy_feature = np.array([model.infer(CV2_FRAME)])
-   ```
-   
+    ``` 
+    mkdir build
+    cd build
+    cmake -DBUILD_FASTRT_ENGINE=ON \
+        -DBUILD_DEMO=ON \
+        -DBUILD_PYTHON_INTERFACE=ON ..
+    make
+    ```
+    
+    You should get a so file `FastRT/build/pybind_interface/ReID.cpython-37m-x86_64-linux-gnu.so`. 
    
+    Then go to [step 5](#step5) to create engine file.
 
-   
+    After that you can import this so file in python, and deserialize engine file to infer in python. 
 
+    You can find use example in `pybind_interface/test.py` and `pybind_interface/market_benchmark.py`.
+    
+    ``` 
+    from PATH_TO_SO_FILE import ReID
+    model = ReID(GPU_ID)
+    model.build(PATH_TO_YOUR_ENGINEFILE)
+    numpy_feature = np.array([model.infer(CV2_FRAME)])
+    ```
+    
+    * `pybind_interface/test.py` use `pybind_interface/docker/trt7cu100/Dockerfile` (without pytorch installed)
+    * `pybind_interface/market_benchmark.py` use `pybind_interface/docker/trt7cu102_torch160/Dockerfile` (with pytorch installed)
+    
 ### <a name="ConfigSection"></a>`Tensorrt Model Config`
 
 Edit `FastRT/demo/inference.cpp`, according to your model config
@@ -124,7 +133,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
 static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
 
 static const int MAX_BATCH_SIZE = 4;
-static const int INPUT_H = 256;
+static const int INPUT_H = 384;
 static const int INPUT_W = 128;
 static const int OUTPUT_SIZE = 2048;
 static const int DEVICE_ID = 0;
@@ -144,7 +153,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50.wts";
 static const std::string ENGINE_PATH = "./sbs_R50.engine"; 
 
 static const int MAX_BATCH_SIZE = 4;
-static const int INPUT_H = 256;
+static const int INPUT_H = 384;
 static const int INPUT_W = 128;
 static const int OUTPUT_SIZE = 2048;
 static const int DEVICE_ID = 0;
@@ -164,7 +173,7 @@ static const std::string WEIGHTS_PATH = "../sbs_r34_distill.wts";
 static const std::string ENGINE_PATH = "./sbs_r34_distill.engine";
 
 static const int MAX_BATCH_SIZE = 4;
-static const int INPUT_H = 256;
+static const int INPUT_H = 384;
 static const int INPUT_W = 128;
 static const int OUTPUT_SIZE = 512;
 static const int DEVICE_ID = 0;
@@ -184,7 +193,7 @@ static const std::string WEIGHTS_PATH = "../kd_r34_distill.wts";
 static const std::string ENGINE_PATH = "./kd_r34_distill.engine"; 
 
 static const int MAX_BATCH_SIZE = 4;
-static const int INPUT_H = 256;
+static const int INPUT_H = 384;
 static const int INPUT_W = 128;
 static const int OUTPUT_SIZE = 512;
 static const int DEVICE_ID = 0;
diff --git a/projects/FastRT/demo/inference.cpp b/projects/FastRT/demo/inference.cpp
index 3fd2650..bff73ce 100755
--- a/projects/FastRT/demo/inference.cpp
+++ b/projects/FastRT/demo/inference.cpp
@@ -16,7 +16,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
 static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
 
 static const int MAX_BATCH_SIZE = 4;
-static const int INPUT_H = 256;
+static const int INPUT_H = 384;
 static const int INPUT_W = 128;
 static const int OUTPUT_SIZE = 2048;
 static const int DEVICE_ID = 0;
diff --git a/projects/FastRT/fastrt/meta_arch/model.cpp b/projects/FastRT/fastrt/meta_arch/model.cpp
index 73476ba..98acb15 100755
--- a/projects/FastRT/fastrt/meta_arch/model.cpp
+++ b/projects/FastRT/fastrt/meta_arch/model.cpp
@@ -1,6 +1,9 @@
 #include "fastrt/model.h"
 #include "fastrt/calibrator.h"
 
+#ifdef BUILD_INT8
+#include "fastrt/config.h"
+#endif 
 
 namespace fastrt {
 
@@ -73,23 +76,17 @@ namespace fastrt {
 #if defined(BUILD_FP16) && defined(BUILD_INT8)
         std::cout << "Flag confilct! BUILD_FP16 and BUILD_INT8 can't be both True!" << std::endl;
         return null;
-#endif
-#ifdef BUILD_FP16
+#endif 
+#if defined(BUILD_FP16)
         std::cout << "[Build fp16]" << std::endl;
         config->setFlag(BuilderFlag::kFP16);
-#endif 
-#ifdef BUILD_INT8
+#elif defined(BUILD_INT8)
         std::cout << "[Build int8]" << std::endl;
         std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
-        assert(builder->platformHasFastInt8());
+        TRTASSERT(builder->platformHasFastInt8());
         config->setFlag(BuilderFlag::kINT8);
-        int w = _engineCfg.input_w;
-        int h = _engineCfg.input_h;
-        char*p = (char*)_engineCfg.input_name.data();
-
-        //path must end with /
-        Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, w, h, 
-            "/data/person_reid/data/Market-1501-v15.09.15/bounding_box_test/", "int8calib.table", p);
+        Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, _engineCfg.input_w, _engineCfg.input_h, 
+            INT8_CALIBRATE_DATASET_PATH.c_str(), "int8calib.table", _engineCfg.input_name.c_str());
         config->setInt8Calibrator(calibrator);
 #endif 
         auto engine = make_holder(builder->buildEngineWithConfig(*network, *config));
@@ -147,4 +144,4 @@ namespace fastrt {
     int Model::getDeviceID() { 
         return _engineCfg.device_id; 
     }
-}
+}
\ No newline at end of file
diff --git a/projects/FastRT/include/fastrt/config.h.in b/projects/FastRT/include/fastrt/config.h.in
new file mode 100755
index 0000000..2dd2830
--- /dev/null
+++ b/projects/FastRT/include/fastrt/config.h.in
@@ -0,0 +1,7 @@
+#pragma once
+
+#ifdef BUILD_INT8
+#include <string>
+const std::string INT8_CALIBRATE_DATASET_PATH = "@INT8_CALIBRATE_DATASET_PATH@";
+#endif
+
diff --git a/projects/FastRT/pybind_interface/CMakeLists.txt b/projects/FastRT/pybind_interface/CMakeLists.txt
index eb92995..bc7204c 100755
--- a/projects/FastRT/pybind_interface/CMakeLists.txt
+++ b/projects/FastRT/pybind_interface/CMakeLists.txt
@@ -1,7 +1,7 @@
 SET(APP_PROJECT_NAME ReID)
 
 # pybind
-add_subdirectory(pybind11)
+find_package(pybind11)
 
 find_package(CUDA REQUIRED)
 # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
diff --git a/projects/FastRT/pybind_interface/docker/trt7cu100/Dockerfile b/projects/FastRT/pybind_interface/docker/trt7cu100/Dockerfile
old mode 100755
new mode 100644
index 7672397..5fdc424
--- a/projects/FastRT/pybind_interface/docker/trt7cu100/Dockerfile
+++ b/projects/FastRT/pybind_interface/docker/trt7cu100/Dockerfile
@@ -1,17 +1,39 @@
 # cuda10.0
 FROM fineyu/tensorrt7:0.0.1
 
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y software-properties-common
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    software-properties-common \
+    cmake \
+    wget \
+    python3.7-dev python3-pip 
 
 RUN add-apt-repository -y ppa:timsc/opencv-3.4 && \
-apt-get update && \
-apt-get install -y cmake \
-libopencv-dev \
-libopencv-dnn-dev \
-libopencv-shape3.4-dbg && \
-rm -rf /var/lib/apt/lists/*
+    apt-get update && \
+    apt-get install -y \
+    libopencv-dev \
+    libopencv-dnn-dev \
+    libopencv-shape3.4-dbg && \
+    apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
-RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && rm get-pip.py && pip3 install torch==1.6.0 torchvision tensorboard matplotlib scipy Pillow numpy prettytable easydict opencv-python \
-scikit-learn pyyaml yacs termcolor tabulate tensorboard opencv-python pyyaml yacs termcolor tabulate gdown faiss-cpu
+RUN wget https://bootstrap.pypa.io/get-pip.py && \
+    python3 get-pip.py --force-reinstall && \
+    rm get-pip.py
+
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 && \
+    update-alternatives --set python3 /usr/bin/python3.7
+
+RUN pip install pytest opencv-python 
+
+RUN cd /usr/local/src && \
+    wget https://github.com/pybind/pybind11/archive/v2.2.3.tar.gz && \
+    tar xvf v2.2.3.tar.gz && \
+    cd pybind11-2.2.3 && \
+    mkdir build && \
+    cd build && \
+    cmake .. && \
+    make -j12 && \
+    make install && \
+    cd ../.. && \
+    rm -rf pybind11-2.2.3 && \
+    rm -rf v2.2.3.tar.gz
diff --git a/projects/FastRT/pybind_interface/docker/trt7cu102/Dockerfile b/projects/FastRT/pybind_interface/docker/trt7cu102/Dockerfile
deleted file mode 100755
index edec2ae..0000000
--- a/projects/FastRT/pybind_interface/docker/trt7cu102/Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-# cuda10.2
-FROM nvcr.io/nvidia/tensorrt:20.03-py3
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y software-properties-common
-
-RUN add-apt-repository -y ppa:timsc/opencv-3.4 && \
-apt-get update && \
-apt-get install -y cmake \
-libopencv-dev \
-libopencv-dnn-dev \
-libopencv-shape3.4-dbg && \
-rm -rf /var/lib/apt/lists/*
-
-RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && rm get-pip.py && pip3 install torch==1.6.0 torchvision tensorboard matplotlib scipy Pillow numpy prettytable easydict opencv-python \
-scikit-learn pyyaml yacs termcolor tabulate tensorboard opencv-python pyyaml yacs termcolor tabulate gdown faiss-cpu
diff --git a/projects/FastRT/pybind_interface/docker/trt7cu102_torch160/Dockerfile b/projects/FastRT/pybind_interface/docker/trt7cu102_torch160/Dockerfile
new file mode 100755
index 0000000..f8b84d7
--- /dev/null
+++ b/projects/FastRT/pybind_interface/docker/trt7cu102_torch160/Dockerfile
@@ -0,0 +1,10 @@
+# cuda10.2
+FROM darrenhsieh1717/trt7-cu102-cv34:pybind
+
+RUN pip install torch==1.6.0 torchvision==0.7.0
+
+RUN pip install opencv-python tensorboard cython yacs termcolor scikit-learn tabulate gdown gpustat ipdb h5py fs faiss-gpu
+
+RUN git clone https://github.com/NVIDIA/apex && \
+    cd apex && \
+    python3 setup.py install
diff --git a/projects/FastRT/pybind_interface/market_benchmark.py b/projects/FastRT/pybind_interface/market_benchmark.py
index 52f4ace..51c9caf 100755
--- a/projects/FastRT/pybind_interface/market_benchmark.py
+++ b/projects/FastRT/pybind_interface/market_benchmark.py
@@ -23,7 +23,7 @@ from fastreid.evaluation.rank import eval_market1501
 from build.pybind_interface.ReID import ReID
 
 
-FEATURE_DIM = 512
+FEATURE_DIM = 2048
 GPU_ID = 0
 
 def map(wrapper):
@@ -61,5 +61,5 @@ def map(wrapper):
 
 if __name__ == '__main__':
 	infer = ReID(GPU_ID)
-	infer.build("../build/kd_r18_distill.engine")
+	infer.build("../build/sbs_R50-ibn.engine")
 	map(infer)
diff --git a/projects/FastRT/pybind_interface/pybind11 b/projects/FastRT/pybind_interface/pybind11
deleted file mode 160000
index 0e01c24..0000000
--- a/projects/FastRT/pybind_interface/pybind11
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 0e01c243c7ffae3a2e52f998bacfe82f56aa96d9
diff --git a/projects/FastRT/pybind_interface/reid.cpp b/projects/FastRT/pybind_interface/reid.cpp
index 1156943..17a4fac 100755
--- a/projects/FastRT/pybind_interface/reid.cpp
+++ b/projects/FastRT/pybind_interface/reid.cpp
@@ -18,7 +18,7 @@ static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
 static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
 
 static const int MAX_BATCH_SIZE = 4;
-static const int INPUT_H = 256;
+static const int INPUT_H = 384;
 static const int INPUT_W = 128;
 static const int OUTPUT_SIZE = 2048;
 static const int DEVICE_ID = 0;
diff --git a/projects/FastRT/pybind_interface/test.py b/projects/FastRT/pybind_interface/test.py
index a1a1db3..a694b1a 100755
--- a/projects/FastRT/pybind_interface/test.py
+++ b/projects/FastRT/pybind_interface/test.py
@@ -7,12 +7,12 @@ import time
 
 
 if __name__ == '__main__':
-    iter_ = 20000
+    iter_ = 10
     m = ReID(0)
-    m.build("../build/kd_r18_distill.engine")
+    m.build("../build/sbs_R50-ibn.engine")
     print("build done")
     
-    frame = cv2.imread("/data/sunp/algorithm/2020_1015_time/pytorchtotensorrt_reid/test/query/0001/0001_c1s1_001051_00.jpg")
+    frame = cv2.imread("../data/Market-1501-v15.09.15/calib_set/-1_c1s2_009916_03.jpg")
     m.infer(frame)
     t0 = time.time()