[Feature] Sync mmaction2-sdk(master) to dev1.x ()

* sync mmaction sdk

* pipeline.json

* fix docs

* replace topk when make regression for mmaction2

* add python api

* add missing file

* add missing test file

* remove cudnn dep for formatshape

* add sample arg for input
pull/1362/head
Chen Xin 2022-11-10 15:13:24 +08:00 committed by GitHub
parent 180500d76d
commit ccc21289d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
46 changed files with 2678 additions and 35 deletions

View File

@ -0,0 +1,190 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "video_recognizer.h"
#include <numeric>
#include <vector>
#include "common_internal.h"
#include "executor_internal.h"
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/codebase/mmaction/mmaction.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/mat.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/status_code.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/core/value.h"
#include "model.h"
#include "pipeline.h"
using namespace mmdeploy;
namespace {
Value config_template(const Model& model) {
// clang-format off
return {
{"type", "Pipeline"},
{"input", {"video"}},
{
"tasks", {
{
{"name", "Video Recognizer"},
{"type", "Inference"},
{"input", "video"},
{"output", "label"},
{"params", {{"model", std::move(model)}}},
}
}
},
{"output", "label"},
};
// clang-format on
}
} // namespace
int mmdeploy_video_recognizer_create(mmdeploy_model_t model, const char* device_name, int device_id,
mmdeploy_video_recognizer_t* recognizer) {
mmdeploy_context_t context{};
auto ec = mmdeploy_context_create_by_device(device_name, device_id, &context);
if (ec != MMDEPLOY_SUCCESS) {
return ec;
}
ec = mmdeploy_video_recognizer_create_v2(model, context, recognizer);
mmdeploy_context_destroy(context);
return ec;
}
int mmdeploy_video_recognizer_create_by_path(const char* model_path, const char* device_name,
int device_id,
mmdeploy_video_recognizer_t* recognizer) {
mmdeploy_model_t model{};
if (auto ec = mmdeploy_model_create_by_path(model_path, &model)) {
return ec;
}
auto ec = mmdeploy_video_recognizer_create(model, device_name, device_id, recognizer);
mmdeploy_model_destroy(model);
return ec;
}
int mmdeploy_video_recognizer_apply(mmdeploy_video_recognizer_t recognizer,
const mmdeploy_mat_t* images,
const mmdeploy_video_sample_info_t* video_info, int video_count,
mmdeploy_video_recognition_t** results, int** result_count) {
wrapped<mmdeploy_value_t> input;
if (auto ec =
mmdeploy_video_recognizer_create_input(images, video_info, video_count, input.ptr())) {
return ec;
}
wrapped<mmdeploy_value_t> output;
if (auto ec = mmdeploy_video_recognizer_apply_v2(recognizer, input, output.ptr())) {
return ec;
}
if (auto ec = mmdeploy_video_recognizer_get_result(output, results, result_count)) {
return ec;
}
return MMDEPLOY_SUCCESS;
}
void mmdeploy_video_recognizer_release_result(mmdeploy_video_recognition_t* results,
int* result_count, int video_count) {
delete[] results;
delete[] result_count;
}
void mmdeploy_video_recognizer_destroy(mmdeploy_video_recognizer_t recognizer) {
mmdeploy_pipeline_destroy((mmdeploy_pipeline_t)recognizer);
}
int mmdeploy_video_recognizer_create_v2(mmdeploy_model_t model, mmdeploy_context_t context,
mmdeploy_video_recognizer_t* recognizer) {
auto config = config_template(*Cast(model));
return mmdeploy_pipeline_create_v3(Cast(&config), context, (mmdeploy_pipeline_t*)recognizer);
}
int mmdeploy_video_recognizer_create_input(const mmdeploy_mat_t* images,
const mmdeploy_video_sample_info_t* video_info,
int video_count, mmdeploy_value_t* value) {
if (video_count && (images == nullptr || video_info == nullptr)) {
return MMDEPLOY_E_INVALID_ARG;
}
try {
auto input = std::make_unique<Value>(Value{Value::kArray});
auto sample = std::make_unique<Value>(Value::kArray);
for (int i = 0; i < video_count; ++i) {
int clip_len = video_info[i].clip_len;
int num_clips = video_info[i].num_clips;
int n_mat = clip_len * num_clips;
for (int j = 0; j < n_mat; j++) {
mmdeploy::Mat _mat{images[j].height,
images[j].width,
PixelFormat(images[j].format),
DataType(images[j].type),
images[j].data,
images[j].device ? *(const Device*)(images[j].device) : Device{0}};
sample->push_back({{"ori_img", _mat}, {"clip_len", clip_len}, {"num_clips", num_clips}});
}
input->front().push_back(std::move(*sample.release()));
}
*value = Cast(input.release());
} catch (const std::exception& e) {
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
} catch (...) {
MMDEPLOY_ERROR("unknown exception caught");
}
return MMDEPLOY_SUCCESS;
}
int mmdeploy_video_recognizer_apply_v2(mmdeploy_video_recognizer_t recognizer,
mmdeploy_value_t input, mmdeploy_value_t* output) {
return mmdeploy_pipeline_apply((mmdeploy_pipeline_t)recognizer, input, output);
}
int mmdeploy_video_recognizer_get_result(mmdeploy_value_t output,
mmdeploy_video_recognition_t** results,
int** result_count) {
if (!output || !results || !result_count) {
return MMDEPLOY_E_INVALID_ARG;
}
try {
Value& value = Cast(output)->front();
auto classify_outputs = from_value<std::vector<mmaction::Labels>>(value);
std::vector<int> _result_count;
_result_count.reserve(classify_outputs.size());
for (const auto& cls_output : classify_outputs) {
_result_count.push_back((int)cls_output.size());
}
auto total = std::accumulate(begin(_result_count), end(_result_count), 0);
std::unique_ptr<int[]> result_count_data(new int[_result_count.size()]{});
std::copy(_result_count.begin(), _result_count.end(), result_count_data.get());
std::unique_ptr<mmdeploy_video_recognition_t[]> result_data(
new mmdeploy_video_recognition_t[total]{});
auto result_ptr = result_data.get();
for (const auto& cls_output : classify_outputs) {
for (const auto& label : cls_output) {
result_ptr->label_id = label.label_id;
result_ptr->score = label.score;
++result_ptr;
}
}
*result_count = result_count_data.release();
*results = result_data.release();
return MMDEPLOY_SUCCESS;
} catch (const std::exception& e) {
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
} catch (...) {
MMDEPLOY_ERROR("unknown exception caught");
}
return MMDEPLOY_E_FAIL;
}

View File

@ -0,0 +1,139 @@
// Copyright (c) OpenMMLab. All rights reserved.
/**
* @file video_recognizer.h
* @brief Interface to MMACTION video recognition task
*/
#ifndef MMDEPLOY_VIDEO_RECOGNIZER_H
#define MMDEPLOY_VIDEO_RECOGNIZER_H
#include "common.h"
#include "executor.h"
#include "model.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct mmdeploy_video_recognition_t {
int label_id;
float score;
} mmdeploy_video_recognition_t;
typedef struct mmdeploy_video_sample_info_t {
int clip_len;
int num_clips;
} mmdeploy_video_sample_info_t;
typedef struct mmdeploy_video_recognizer* mmdeploy_video_recognizer_t;
/**
* @brief Create video recognizer's handle
* @param[in] model an instance of mmaction sdk model created by
* \ref mmdeploy_model_create_by_path or \ref mmdeploy_model_create in \ref model.h
* @param[in] device_name name of device, such as "cpu", "cuda", etc.
* @param[in] device_id id of device.
* @param[out] recognizer handle of the created video recognizer, which must be destroyed
* by \ref mmdeploy_video_recognizer_destroy
* @return status of creating video recognizer's handle
*/
MMDEPLOY_API int mmdeploy_video_recognizer_create(mmdeploy_model_t model, const char* device_name,
int device_id,
mmdeploy_video_recognizer_t* recognizer);
/**
* @brief Create a video recognizer instance
* @param[in] model_path path to video recognition model
* @param[in] device_name name of device, such as "cpu", "cuda", etc.
* @param[in] device_id id of device.
* @param[out] recognizer handle of the created video recognizer, which must be destroyed
* by \ref mmdeploy_video_recognizer_destroy
* @return status code of the operation
*/
MMDEPLOY_API int mmdeploy_video_recognizer_create_by_path(const char* model_path,
const char* device_name, int device_id,
mmdeploy_video_recognizer_t* recognizer);
/**
* @brief Apply video recognizer to a batch of videos
* @param[in] recognizer video recognizer's handle created by \ref
* mmdeploy_video_recognizer_create_by_path
* @param[in] images a batch of videos
* @param[in] video_info video information of each video
* @param[in] video_count number of videos
* @param[out] results a linear buffer contains the recognized video, must be release
* by \ref mmdeploy_video_recognizer_release_result
* @param[out] result_count a linear buffer with length being \p video_count to save the number of
* recognition results of each video. It must be released by \ref
* mmdeploy_video_recognizer_release_result
* @return status code of the operation
*/
MMDEPLOY_API int mmdeploy_video_recognizer_apply(mmdeploy_video_recognizer_t recognizer,
const mmdeploy_mat_t* images,
const mmdeploy_video_sample_info_t* video_info,
int video_count,
mmdeploy_video_recognition_t** results,
int** result_count);
/** @brief Release result buffer returned by \ref mmdeploy_video_recognizer_apply
* @param[in] results result buffer by video recognizer
* @param[in] result_count \p results size buffer
* @param[in] video_count length of \p result_count
*/
MMDEPLOY_API void mmdeploy_video_recognizer_release_result(mmdeploy_video_recognition_t* results,
int* result_count, int video_count);
/**
* @brief destroy video recognizer
* @param[in] recognizer handle of video recognizer created by \ref
* mmdeploy_video_recognizer_create_by_path or \ref mmdeploy_video_recognizer_create
*/
MMDEPLOY_API void mmdeploy_video_recognizer_destroy(mmdeploy_video_recognizer_t recognizer);
/**
* @brief Same as \ref mmdeploy_video_recognizer_create, but allows to control execution context of
* tasks via context
*/
MMDEPLOY_API int mmdeploy_video_recognizer_create_v2(mmdeploy_model_t model,
mmdeploy_context_t context,
mmdeploy_video_recognizer_t* recognizer);
/**
* @brief Pack video recognizer inputs into mmdeploy_value_t
* @param[in] images a batch of videos
* @param[in] video_info video information of each video
* @param[in] video_count number of videos in the batch
* @param[out] value created value
* @return status code of the operation
*/
MMDEPLOY_API int mmdeploy_video_recognizer_create_input(
const mmdeploy_mat_t* images, const mmdeploy_video_sample_info_t* video_info, int video_count,
mmdeploy_value_t* value);
/**
* @brief Apply video recognizer to a batch of videos
* @param[in] input packed input
* @param[out] output inference output
* @return status code of the operation
*/
MMDEPLOY_API int mmdeploy_video_recognizer_apply_v2(mmdeploy_video_recognizer_t recognizer,
mmdeploy_value_t input,
mmdeploy_value_t* output);
/**
* @brief Apply video recognizer to a batch of videos
* @param[in] output inference output
* @param[out] results structured output
* @param[out] result_count number of each videos
* @return status code of the operation
*/
MMDEPLOY_API int mmdeploy_video_recognizer_get_result(mmdeploy_value_t output,
mmdeploy_video_recognition_t** results,
int** result_count);
#ifdef __cplusplus
}
#endif
#endif // MMDEPLOY_VIDEO_RECOGNIZER_H

View File

@ -0,0 +1,91 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_MMDEPLOY_APIS_CXX_VIDEO_RECOGNIZER_HPP_
#define MMDEPLOY_CSRC_MMDEPLOY_APIS_CXX_VIDEO_RECOGNIZER_HPP_
#include "mmdeploy/common.hpp"
#include "mmdeploy/video_recognizer.h"
namespace mmdeploy {
namespace cxx {
using VideoRecognition = mmdeploy_video_recognition_t;
using VideoSampleInfo = mmdeploy_video_sample_info_t;
class VideoRecognizer : public NonMovable {
public:
VideoRecognizer(const Model& model, const Context& context) {
auto ec = mmdeploy_video_recognizer_create_v2(model, context, &recognizer_);
if (ec != MMDEPLOY_SUCCESS) {
throw_exception(static_cast<ErrorCode>(ec));
}
}
~VideoRecognizer() {
if (recognizer_) {
mmdeploy_video_recognizer_destroy(recognizer_);
recognizer_ = {};
}
}
using Result = Result_<VideoRecognition>;
std::vector<Result> Apply(Span<const std::vector<Mat>> videos,
Span<const VideoSampleInfo> infos) {
if (videos.empty()) {
return {};
}
int video_count = videos.size();
VideoRecognition* results{};
int* result_count{};
std::vector<Mat> images;
std::vector<VideoSampleInfo> video_info;
for (int i = 0; i < videos.size(); i++) {
for (auto& mat : videos[i]) {
images.push_back(mat);
}
video_info.push_back(infos[i]);
}
auto ec =
mmdeploy_video_recognizer_apply(recognizer_, reinterpret(images.data()), video_info.data(),
video_count, &results, &result_count);
if (ec != MMDEPLOY_SUCCESS) {
throw_exception(static_cast<ErrorCode>(ec));
}
std::vector<Result> rets;
rets.reserve(video_count);
std::shared_ptr<VideoRecognition> data(results, [result_count, count = video_count](auto p) {
mmdeploy_video_recognizer_release_result(p, result_count, count);
});
size_t offset = 0;
for (size_t i = 0; i < video_count; ++i) {
offset += rets.emplace_back(offset, result_count[i], data).size();
}
return rets;
}
Result Apply(const std::vector<Mat>& video, const VideoSampleInfo info) {
return Apply(Span{video}, Span{info})[0];
}
private:
mmdeploy_video_recognizer_t recognizer_{};
};
} // namespace cxx
using cxx::VideoRecognition;
using cxx::VideoRecognizer;
using cxx::VideoSampleInfo;
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_MMDEPLOY_APIS_CXX_VIDEO_RECOGNIZER_HPP_

View File

@ -0,0 +1,88 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/video_recognizer.h"
#include "common.h"
namespace mmdeploy::python {
class PyVideoRecognizer {
public:
PyVideoRecognizer(const char* model_path, const char* device_name, int device_id) {
auto status =
mmdeploy_video_recognizer_create_by_path(model_path, device_name, device_id, &recognizer_);
if (status != MMDEPLOY_SUCCESS) {
throw std::runtime_error("failed to create video_recognizer");
}
}
std::vector<std::vector<std::tuple<int, float>>> Apply(
const std::vector<std::vector<PyImage>>& imgs, const std::vector<std::pair<int, int>>& info) {
if (info.size() != imgs.size()) {
throw std::invalid_argument("the length of info is not equal with imgs");
}
for (int i = 0; i < info.size(); i++) {
if (imgs[i].size() != info[i].first * info[i].second) {
throw std::invalid_argument("invalid info");
}
}
int total = 0;
for (int i = 0; i < imgs.size(); i++) {
total += imgs[i].size();
}
std::vector<mmdeploy_mat_t> clips;
std::vector<mmdeploy_video_sample_info_t> clip_info;
clips.reserve(total);
clip_info.reserve(total);
for (int i = 0; i < imgs.size(); i++) {
for (const auto& img : imgs[i]) {
auto mat = GetMat(img);
clips.push_back(mat);
}
clip_info.push_back({info[i].first, info[i].second});
}
mmdeploy_video_recognition_t* results{};
int* result_count{};
auto status = mmdeploy_video_recognizer_apply(recognizer_, clips.data(), clip_info.data(), 1,
&results, &result_count);
if (status != MMDEPLOY_SUCCESS) {
throw std::runtime_error("failed to apply video_recognizer, code: " + std::to_string(status));
}
auto output = std::vector<std::vector<std::tuple<int, float>>>{};
output.reserve(imgs.size());
auto result_ptr = results;
for (int i = 0; i < imgs.size(); ++i) {
std::vector<std::tuple<int, float>> label_score;
for (int j = 0; j < result_count[i]; ++j) {
label_score.emplace_back(result_ptr[j].label_id, result_ptr[j].score);
}
output.push_back(std::move(label_score));
result_ptr += result_count[i];
}
mmdeploy_video_recognizer_release_result(results, result_count, (int)imgs.size());
return output;
}
~PyVideoRecognizer() {
mmdeploy_video_recognizer_destroy(recognizer_);
recognizer_ = {};
}
private:
mmdeploy_video_recognizer_t recognizer_{};
};
static PythonBindingRegisterer register_video_recognizer{[](py::module& m) {
py::class_<PyVideoRecognizer>(m, "VideoRecognizer")
.def(py::init([](const char* model_path, const char* device_name, int device_id) {
return std::make_unique<PyVideoRecognizer>(model_path, device_name, device_id);
}),
py::arg("model_path"), py::arg("device_name"), py::arg("device_id") = 0)
.def("__call__",
[](PyVideoRecognizer* self, const std::vector<PyImage>& imgs,
const std::pair<int, int>& info) { return self->Apply({imgs}, {info})[0]; })
.def("batch", &PyVideoRecognizer::Apply);
}};
} // namespace mmdeploy::python

View File

@ -11,6 +11,7 @@ if ("all" IN_LIST MMDEPLOY_CODEBASES)
list(APPEND CODEBASES "mmedit")
list(APPEND CODEBASES "mmpose")
list(APPEND CODEBASES "mmrotate")
list(APPEND CODEBASES "mmaction")
else ()
set(CODEBASES ${MMDEPLOY_CODEBASES})
endif ()

View File

@ -0,0 +1,15 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_mmaction)
file(GLOB SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
add_subdirectory(cpu)
add_subdirectory(cuda)
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy::transform
mmdeploy_opencv_utils)
add_library(mmdeploy::mmaction ALIAS ${PROJECT_NAME})
set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} video_recognizer CACHE INTERNAL "")

View File

@ -0,0 +1,70 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <algorithm>
#include <numeric>
#include "mmdeploy/codebase/mmaction/mmaction.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
namespace mmdeploy::mmaction {
class BaseHead : public MMAction {
public:
explicit BaseHead(const Value& cfg) : MMAction(cfg) {
if (cfg.contains("params")) {
topk_ = cfg["params"].value("topk", 1);
if (topk_ <= 0) {
MMDEPLOY_ERROR("'topk' should be greater than 0, but got '{}'", topk_);
throw_exception(eInvalidArgument);
}
}
}
Result<Value> operator()(const Value& infer_res) {
MMDEPLOY_DEBUG("infer_res: {}", infer_res);
auto output = infer_res["output"].get<Tensor>();
if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
(int)output.data_type());
return Status(eNotSupported);
}
auto class_num = (int)output.shape(1);
OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(output, kHost, stream()));
OUTCOME_TRY(stream().Wait());
return GetLabels(_scores, class_num);
}
private:
Value GetLabels(const Tensor& scores, int class_num) const {
auto scores_data = scores.data<float>();
Labels output;
output.reserve(topk_);
std::vector<int> idx(class_num);
iota(begin(idx), end(idx), 0);
partial_sort(begin(idx), begin(idx) + topk_, end(idx),
[&](int i, int j) { return scores_data[i] > scores_data[j]; });
for (int i = 0; i < topk_; ++i) {
auto label = Label{idx[i], scores_data[idx[i]]};
MMDEPLOY_DEBUG("label_id: {}, score: {}", label.label_id, label.score);
output.push_back(label);
}
return to_value(std::move(output));
}
private:
static constexpr const auto kHost = Device{0};
int topk_{1};
};
REGISTER_CODEBASE_COMPONENT(MMAction, BaseHead);
using SlowFastHead = BaseHead;
REGISTER_CODEBASE_COMPONENT(MMAction, SlowFastHead);
using TSNHead = BaseHead;
REGISTER_CODEBASE_COMPONENT(MMAction, TSNHead);
} // namespace mmdeploy::mmaction

View File

@ -0,0 +1,15 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_mmaction_cpu_impl CXX)
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
add_library(${PROJECT_NAME} OBJECT format_shape_impl.cpp)
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
if (NOT (MMDEPLOY_SHARED_LIBS OR MSVC))
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
endif ()
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy::core)
target_link_libraries(mmdeploy_mmaction PRIVATE ${PROJECT_NAME})
mmdeploy_export(${PROJECT_NAME})
endif ()

View File

@ -0,0 +1,138 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmaction/format_shape.h"
#include "mmdeploy/core/utils/device_utils.h"
using namespace std;
namespace mmdeploy {
namespace cpu {
class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
public:
explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
protected:
Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
int N = tensors.size();
int H = tensors[0].shape(1);
int W = tensors[0].shape(2);
int C = tensors[0].shape(3);
std::vector<Tensor> host_tensors;
host_tensors.reserve(N);
for (int i = 0; i < N; i++) {
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensors[i], kHost, stream_));
host_tensors.push_back(std::move(src_tensor));
}
OUTCOME_TRY(stream_.Wait());
TensorDesc desc = {kHost, DataType::kFLOAT, {N, H, W, C}};
Tensor imgs(desc);
int offset = 0;
int n_item = H * W * C;
int copy_size = n_item * sizeof(float);
for (int i = 0; i < N; i++) {
auto src_buffer = host_tensors[i].buffer();
auto dst_buffer = imgs.buffer();
OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
offset += copy_size;
}
OUTCOME_TRY(stream_.Wait());
Tensor dst;
if (arg_.input_format == "NCHW") {
OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
}
if (arg_.input_format == "NCTHW") {
OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
}
TensorShape expand_dim = dst.shape();
expand_dim.insert(expand_dim.begin(), 1);
dst.Reshape(expand_dim);
return dst;
}
Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
};
Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
int L = clip_len;
if (N % L != 0) {
return Status(eInvalidArgument);
}
int M = N / L;
src.Reshape({M, L, H, W, C});
return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
};
Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
const std::vector<int>& permutation) {
Tensor dst(src.desc());
TensorShape shape(src.shape().size());
for (int i = 0; i < shape.size(); i++) {
shape[i] = src.shape(permutation[i]);
}
dst.Reshape(shape);
int ndim = shape.size();
std::vector<int> dst_strides(ndim);
std::vector<int> src_strides(ndim);
dst_strides[ndim - 1] = src_strides[ndim - 1] = 1;
for (int i = ndim - 2; i >= 0; i--) {
dst_strides[i] = dst_strides[i + 1] * shape[i + 1];
src_strides[i] = src_strides[i + 1] * src_dims[i + 1];
}
std::vector<int> tmp(ndim);
for (int i = 0; i < ndim; i++) {
tmp[i] = src_strides[permutation[i]];
}
src_strides.swap(tmp);
std::vector<int> coord(ndim, 0);
auto dst_data = dst.data<float>();
auto src_data = src.data<float>();
int i;
do {
dst_data[0] = src_data[0];
for (i = ndim - 1; i >= 0; i--) {
if (++coord[i] == shape[i]) {
coord[i] = 0;
dst_data -= (shape[i] - 1) * dst_strides[i];
src_data -= (shape[i] - 1) * src_strides[i];
} else {
dst_data += dst_strides[i];
src_data += src_strides[i];
break;
}
}
} while (i >= 0);
return dst;
}
constexpr static Device kHost{0, 0};
};
class FormatShapeImplCreator : public Creator<::mmdeploy::FormatShapeImpl> {
public:
const char* GetName() const override { return "cpu"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override { return make_unique<FormatShapeImpl>(args); }
};
} // namespace cpu
} // namespace mmdeploy
using ::mmdeploy::FormatShapeImpl;
using ::mmdeploy::cpu::FormatShapeImplCreator;
REGISTER_MODULE(FormatShapeImpl, FormatShapeImplCreator);

View File

@ -0,0 +1,18 @@
# Copyright (c) OpenMMLab. All rights reserved.
if (NOT "cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
return()
endif ()
project(mmdeploy_mmaction_cuda_impl CXX)
add_library(${PROJECT_NAME} OBJECT format_shape_impl.cpp transpose.cu)
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
if (NOT (MMDEPLOY_SHARED_LIBS OR MSVC))
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
endif ()
target_include_directories(${PROJECT_NAME} PRIVATE
${CUDA_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy::core)
target_link_libraries(mmdeploy_mmaction PRIVATE ${PROJECT_NAME})
mmdeploy_export(${PROJECT_NAME})

View File

@ -0,0 +1,129 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "cuda_runtime.h"
#include "mmdeploy/codebase/mmaction/format_shape.h"
#include "mmdeploy/core/utils/device_utils.h"
using namespace std;
namespace mmdeploy {
namespace cuda {
template <typename T>
void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides, int ndim,
int total, cudaStream_t stream);
class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
public:
explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
protected:
Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
int N = tensors.size();
int H = tensors[0].shape(1);
int W = tensors[0].shape(2);
int C = tensors[0].shape(3);
auto t0 = std::chrono::high_resolution_clock::now();
TensorDesc desc = {device_, DataType::kFLOAT, {N, H, W, C}};
Tensor imgs(desc);
int offset = 0;
int n_item = H * W * C;
int copy_size = n_item * sizeof(float);
for (int i = 0; i < N; i++) {
auto src_buffer = tensors[i].buffer();
auto dst_buffer = imgs.buffer();
OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
offset += copy_size;
}
Tensor dst;
if (arg_.input_format == "NCHW") {
OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
}
if (arg_.input_format == "NCTHW") {
OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
}
TensorShape expand_dim = dst.shape();
expand_dim.insert(expand_dim.begin(), 1);
dst.Reshape(expand_dim);
return dst;
}
Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
};
Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
int L = clip_len;
if (N % L != 0) {
return Status(eInvalidArgument);
}
int M = N / L;
src.Reshape({M, L, H, W, C});
return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
};
Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
const std::vector<int>& permutation) {
Tensor dst(src.desc());
TensorShape shape(src.shape().size());
for (int i = 0; i < shape.size(); i++) {
shape[i] = src.shape(permutation[i]);
}
dst.Reshape(shape);
int ndim = src_dims.size();
std::vector<int> dst_dims(ndim);
for (int i = 0; i < ndim; i++) {
dst_dims[i] = src_dims[permutation[i]];
}
std::vector<int> src_strides(ndim);
std::vector<int> dst_strides(ndim);
std::vector<int> buffer(ndim);
buffer.back() = 1;
dst_strides.back() = 1;
for (int i = ndim - 1; i > 0; i--) {
buffer[i - 1] = buffer[i] * src_dims[i];
dst_strides[i - 1] = dst_strides[i] * dst_dims[i];
}
for (int i = 0; i < ndim; ++i) {
src_strides[i] = buffer[permutation[i]];
}
Buffer _src_strides(Device("cuda"), sizeof(int) * ndim);
Buffer _dst_strides(Device("cuda"), sizeof(int) * ndim);
OUTCOME_TRY(stream_.Copy(src_strides.data(), _src_strides));
OUTCOME_TRY(stream_.Copy(dst_strides.data(), _dst_strides));
::mmdeploy::cuda::Transpose(src.data<float>(), GetNative<int*>(_src_strides), dst.data<float>(),
GetNative<int*>(_dst_strides), ndim, src.size(),
(cudaStream_t)stream_.GetNative());
return dst;
}
};
class FormatShapeImplCreator : public Creator<::mmdeploy::FormatShapeImpl> {
public:
const char* GetName() const override { return "cuda"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override { return make_unique<FormatShapeImpl>(args); }
};
} // namespace cuda
} // namespace mmdeploy
using ::mmdeploy::FormatShapeImpl;
using ::mmdeploy::cuda::FormatShapeImplCreator;
REGISTER_MODULE(FormatShapeImpl, FormatShapeImplCreator);

View File

@ -0,0 +1,38 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <stdint.h>
#include <stdio.h>
namespace mmdeploy {
namespace cuda {
template <typename T>
__global__ void transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides,
int ndim, int total) {
int u = blockIdx.x * blockDim.x + threadIdx.x;
if (u >= total) return;
int remaining = u;
int v = 0;
for (int i = 0; i < ndim; i++) {
int p = remaining / dst_strides[i];
remaining -= p * dst_strides[i];
v += p * src_strides[i];
}
dst[u] = src[v];
}
template <typename T>
void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides, int ndim,
int total, cudaStream_t stream) {
int thread_num = 256;
int block_num = (total + thread_num - 1) / thread_num;
transpose<T>
<<<block_num, thread_num, 0, stream>>>(src, src_strides, dst, dst_strides, ndim, total);
}
template void Transpose<float>(const float* src, const int* src_strides, float* dst,
const int* dst_strides, int ndim, int total, cudaStream_t stream);
} // namespace cuda
} // namespace mmdeploy

View File

@ -0,0 +1,89 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmaction/format_shape.h"
#include "mmdeploy/archive/json_archive.h"
#include "mmdeploy/core/utils/device_utils.h"
using namespace std;
namespace mmdeploy {
FormatShapeImpl::FormatShapeImpl(const Value& args) : TransformImpl(args) {
arg_.input_format = args.value("input_format", std::string(""));
if (arg_.input_format != "NCHW" && arg_.input_format != "NCTHW") {
throw std::domain_error("'input_format' should be 'NCHW' or 'NCTHW'");
}
}
Result<Value> FormatShapeImpl::Process(const Value& input) {
MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
if (!input.is_array()) {
MMDEPLOY_ERROR("input of format shape should be array");
return Status(eInvalidArgument);
}
if (!(input[0].contains("img") || input[0].contains("img"))) {
MMDEPLOY_ERROR("input should contains imgs or img");
return Status(eInvalidArgument);
}
int n_image = input.size();
int clip_len = input[0]["clip_len"].get<int>();
int num_clips = input[0]["num_clips"].get<int>();
std::vector<Tensor> images;
if (input[0].contains("imgs")) {
int n_crop = input[0]["imgs"].size();
int total = n_image * n_crop;
images.reserve(total);
for (int i = 0; i < n_crop; i++) {
for (int j = 0; j < n_image; j++) {
images.push_back(input[j]["imgs"][i].get<Tensor>());
}
}
} else if (input[0].contains("img")) {
images.reserve(n_image);
for (int i = 0; i < n_image; i++) {
images.push_back(input[i]["img"].get<Tensor>());
}
}
Value output;
OUTCOME_TRY(auto img, Format(images, clip_len, num_clips));
SetTransformData(output, "img", std::move(img));
return output;
}
class FormatShape : public Transform {
public:
explicit FormatShape(const Value& args, int version = 0) : Transform(args) {
auto impl_creator = Registry<FormatShapeImpl>::Get().GetCreator(specified_platform_, version);
if (nullptr == impl_creator) {
MMDEPLOY_ERROR("'FormatShape' is not supported on '{}' platform", specified_platform_);
throw std::domain_error("'FormatShape' is not supported on specified platform");
}
impl_ = impl_creator->Create(args);
}
~FormatShape() override = default;
Result<Value> Process(const Value& input) override { return impl_->Process(input); }
protected:
std::unique_ptr<FormatShapeImpl> impl_;
};
class FormatShapeCreator : public Creator<Transform> {
public:
const char* GetName(void) const override { return "FormatShape"; }
int GetVersion(void) const override { return version_; }
ReturnType Create(const Value& args) override { return make_unique<FormatShape>(args, version_); }
private:
int version_{1};
};
REGISTER_MODULE(Transform, FormatShapeCreator);
MMDEPLOY_DEFINE_REGISTRY(FormatShapeImpl);
} // namespace mmdeploy

View File

@ -0,0 +1,37 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_CODEBASE_MMACTION_FORMAT_SHAPE_H_
#define MMDEPLOY_SRC_CODEBASE_MMACTION_FORMAT_SHAPE_H_
#include <array>
#include <vector>
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/preprocess/transform/transform.h"
namespace mmdeploy {
class FormatShapeImpl : public TransformImpl {
public:
explicit FormatShapeImpl(const Value& args);
~FormatShapeImpl() override = default;
Result<Value> Process(const Value& input) override;
protected:
virtual Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len,
int num_clips) = 0;
protected:
struct format_shape_arg_t {
std::string input_format;
};
using ArgType = struct format_shape_arg_t;
ArgType arg_;
};
MMDEPLOY_DECLARE_REGISTRY(FormatShapeImpl);
} // namespace mmdeploy
#endif

View File

@ -0,0 +1,13 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/codebase/mmaction/mmaction.h"
namespace mmdeploy {
namespace mmaction {
REGISTER_CODEBASE(MMAction);
}
MMDEPLOY_DEFINE_REGISTRY(mmaction::MMAction);
} // namespace mmdeploy

View File

@ -0,0 +1,28 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_CODEBASE_MMACTION_MMACTION_H_
#define MMDEPLOY_SRC_CODEBASE_MMACTION_MMACTION_H_
#include "mmdeploy/codebase/common.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/module.h"
#include "mmdeploy/core/serialization.h"
namespace mmdeploy {
namespace mmaction {
struct Label {
int label_id;
float score;
MMDEPLOY_ARCHIVE_MEMBERS(label_id, score);
};
using Labels = std::vector<Label>;
DECLARE_CODEBASE(MMAction, mmaction);
} // namespace mmaction
MMDEPLOY_DECLARE_REGISTRY(mmaction::MMAction);
} // namespace mmdeploy
#endif // MMDEPLOY_SRC_CODEBASE_MMACTION_MMACTION_H_

View File

@ -5,6 +5,9 @@ project(mmdeploy_cpu_transform_impl)
set(SRCS
collect_impl.cpp
crop_impl.cpp
ten_crop_impl.cpp
three_crop_impl.cpp
crop_utils.cpp
image2tensor_impl.cpp
default_format_bundle_impl.cpp
load_impl.cpp

View File

@ -0,0 +1,24 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/preprocess/transform/crop.h"
#include "mmdeploy/utils/opencv/opencv_utils.h"
using namespace std;
namespace mmdeploy {
namespace cpu {
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
int left, int bottom, int right) {
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensor, device, stream));
SyncOnScopeExit(stream, src_tensor.buffer() != tensor.buffer(), src_tensor);
cv::Mat mat = Tensor2CVMat(src_tensor);
cv::Mat cropped_mat = Crop(mat, top, left, bottom, right);
return CVMat2Tensor(cropped_mat);
}
} // namespace cpu
} // namespace mmdeploy

View File

@ -0,0 +1,47 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/preprocess/transform/ten_crop.h"
#include "opencv_utils.h"
using namespace std;
namespace mmdeploy {
namespace cpu {
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
int left, int bottom, int right);
class TenCropImpl : public ::mmdeploy::TenCropImpl {
public:
explicit TenCropImpl(const Value& args) : ::mmdeploy::TenCropImpl(args) {}
protected:
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
int right) override {
return ::mmdeploy::cpu::CropImage(stream_, device_, tensor, top, left, bottom, right);
}
Result<Tensor> HorizontalFlip(const Tensor& tensor) {
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensor, device_, stream_));
SyncOnScopeExit(stream_, src_tensor.buffer() != tensor.buffer(), src_tensor);
cv::Mat mat = Tensor2CVMat(src_tensor);
cv::Mat flipped_mat;
cv::flip(mat, flipped_mat, 1);
return CVMat2Tensor(flipped_mat);
}
};
class TenCropImplCreator : public Creator<::mmdeploy::TenCropImpl> {
public:
const char* GetName() const override { return "cpu"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override { return make_unique<TenCropImpl>(args); }
};
} // namespace cpu
} // namespace mmdeploy
using ::mmdeploy::TenCropImpl;
using ::mmdeploy::cpu::TenCropImplCreator;
REGISTER_MODULE(TenCropImpl, TenCropImplCreator);

View File

@ -0,0 +1,38 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/preprocess/transform/three_crop.h"
#include "opencv_utils.h"
using namespace std;
namespace mmdeploy {
namespace cpu {
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
int left, int bottom, int right);
class ThreeCropImpl : public ::mmdeploy::ThreeCropImpl {
public:
explicit ThreeCropImpl(const Value& args) : ::mmdeploy::ThreeCropImpl(args) {}
protected:
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
int right) override {
return ::mmdeploy::cpu::CropImage(stream_, device_, tensor, top, left, bottom, right);
}
};
class ThreeCropImplCreator : public Creator<::mmdeploy::ThreeCropImpl> {
public:
const char* GetName() const override { return "cpu"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override { return make_unique<ThreeCropImpl>(args); }
};
} // namespace cpu
} // namespace mmdeploy
using ::mmdeploy::ThreeCropImpl;
using ::mmdeploy::cpu::ThreeCropImplCreator;
REGISTER_MODULE(ThreeCropImpl, ThreeCropImplCreator);

View File

@ -7,6 +7,9 @@ find_package(pplcv REQUIRED)
set(SRCS
collect_impl.cpp
crop_impl.cpp
three_crop_impl.cpp
ten_crop_impl.cpp
crop_utils.cpp
image2tensor_impl.cpp
default_format_bundle_impl.cpp
load_impl.cpp

View File

@ -0,0 +1,66 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <cuda_runtime.h>
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/preprocess/transform/crop.h"
using namespace std;
namespace mmdeploy {
namespace cuda {
template <typename T, int channels>
void Crop(const T* src, int src_w, T* dst, int dst_h, int dst_w, int offset_h, int offset_w,
cudaStream_t stream);
Result<Tensor> CropImage(Stream& _stream, const Device& device, const Tensor& tensor, int top,
int left, int bottom, int right) {
OUTCOME_TRY(auto device_tensor, MakeAvailableOnDevice(tensor, device, _stream));
SyncOnScopeExit sync(_stream, device_tensor.buffer() != tensor.buffer(), device_tensor);
auto stream = GetNative<cudaStream_t>(_stream);
auto desc = device_tensor.desc();
int h = bottom - top + 1;
int w = right - left + 1;
int c = desc.shape[3];
auto type = desc.data_type;
TensorShape shape{1, bottom - top + 1, right - left + 1, tensor.desc().shape[3]};
TensorDesc dst_desc{device, tensor.desc().data_type, shape, desc.name};
Tensor dst_tensor{dst_desc};
assert(device.is_device());
if (DataType::kINT8 == type) {
uint8_t* input = device_tensor.data<uint8_t>();
uint8_t* output = dst_tensor.data<uint8_t>();
if (3 == c) {
Crop<uint8_t, 3>(input, desc.shape[2], output, h, w, top, left, stream);
} else if (1 == c) {
Crop<uint8_t, 1>(input, desc.shape[2], output, h, w, top, left, stream);
} else {
MMDEPLOY_ERROR("unsupported channels {}", c);
return Status(eNotSupported);
}
} else if (DataType::kFLOAT == type) {
float* input = static_cast<float*>(device_tensor.buffer().GetNative());
float* output = static_cast<float*>(dst_tensor.buffer().GetNative());
if (3 == c) {
Crop<float, 3>(input, desc.shape[2], output, h, w, top, left, stream);
} else if (1 == c) {
Crop<float, 1>(input, desc.shape[2], output, h, w, top, left, stream);
} else {
MMDEPLOY_ERROR("unsupported channels {}", c);
return Status(eNotSupported);
}
} else {
MMDEPLOY_ERROR("unsupported channels {}", c);
return Status(eNotSupported);
}
return dst_tensor;
}
} // namespace cuda
} // namespace mmdeploy

View File

@ -0,0 +1,89 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <cuda_runtime.h>
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/preprocess/transform/ten_crop.h"
#include "ppl/cv/cuda/flip.h"
using namespace std;
namespace mmdeploy {
namespace cuda {
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
int left, int bottom, int right);
class TenCropImpl : public ::mmdeploy::TenCropImpl {
public:
explicit TenCropImpl(const Value& args) : ::mmdeploy::TenCropImpl(args) {}
protected:
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
int right) override {
return ::mmdeploy::cuda::CropImage(stream_, device_, tensor, top, left, bottom, right);
}
Result<Tensor> HorizontalFlip(const Tensor& tensor) {
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensor, device_, stream_));
SyncOnScopeExit sync(stream_, src_tensor.buffer() != tensor.buffer(), src_tensor);
TensorDesc dst_desc = tensor.desc();
dst_desc.device = device_;
Tensor dst_tensor(dst_desc);
auto stream = GetNative<cudaStream_t>(stream_);
int h = (int)tensor.shape(1);
int w = (int)tensor.shape(2);
int c = (int)tensor.shape(3);
ppl::common::RetCode ret;
if (tensor.data_type() == DataType::kINT8) {
auto input = tensor.data<uint8_t>();
auto output = dst_tensor.data<uint8_t>();
if (c == 1) {
ret = ppl::cv::cuda::Flip<uint8_t, 1>(stream, h, w, w * c, input, w * c, output, 1);
} else if (c == 3) {
ret = ppl::cv::cuda::Flip<uint8_t, 3>(stream, h, w, w * c, input, w * c, output, 1);
} else {
ret = ppl::common::RC_UNSUPPORTED;
}
} else if (tensor.data_type() == DataType::kFLOAT) {
auto input = tensor.data<float>();
auto output = dst_tensor.data<float>();
if (c == 1) {
ret = ppl::cv::cuda::Flip<float, 1>(stream, h, w, w * c, input, w * c, output, 1);
} else if (c == 3) {
ret = ppl::cv::cuda::Flip<float, 3>(stream, h, w, w * c, input, w * c, output, 1);
} else {
ret = ppl::common::RC_UNSUPPORTED;
}
} else {
MMDEPLOY_ERROR("unsupported data type {}", tensor.data_type());
return Status(eNotSupported);
}
if (ret != 0) {
return Status(eFail);
}
return dst_tensor;
}
};
class TenCropImplCreator : public Creator<::mmdeploy::TenCropImpl> {
public:
const char* GetName() const override { return "cuda"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override { return make_unique<TenCropImpl>(args); }
private:
int version_{1};
};
} // namespace cuda
} // namespace mmdeploy
using ::mmdeploy::TenCropImpl;
using ::mmdeploy::cuda::TenCropImplCreator;
REGISTER_MODULE(TenCropImpl, TenCropImplCreator);

View File

@ -0,0 +1,42 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <cuda_runtime.h>
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/preprocess/transform/three_crop.h"
using namespace std;
namespace mmdeploy {
namespace cuda {
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
int left, int bottom, int right);
class ThreeCropImpl : public ::mmdeploy::ThreeCropImpl {
public:
explicit ThreeCropImpl(const Value& args) : ::mmdeploy::ThreeCropImpl(args) {}
protected:
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
int right) override {
return ::mmdeploy::cuda::CropImage(stream_, device_, tensor, top, left, bottom, right);
}
};
class ThreeCropImplCreator : public Creator<::mmdeploy::ThreeCropImpl> {
public:
const char* GetName() const override { return "cuda"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override { return make_unique<ThreeCropImpl>(args); }
private:
int version_{1};
};
} // namespace cuda
} // namespace mmdeploy
using ::mmdeploy::ThreeCropImpl;
using ::mmdeploy::cuda::ThreeCropImplCreator;
REGISTER_MODULE(ThreeCropImpl, ThreeCropImplCreator);

View File

@ -6,6 +6,8 @@ set(SRCS
collect.cpp
compose.cpp
crop.cpp
three_crop.cpp
ten_crop.cpp
image2tensor.cpp
default_format_bundle.cpp
load.cpp
@ -13,7 +15,8 @@ set(SRCS
pad.cpp
resize.cpp
transform.cpp
tracer.cpp)
tracer.cpp
lift.cpp)
mmdeploy_add_module(${PROJECT_NAME} LIBRARY "${SRCS}")
target_include_directories(
${PROJECT_NAME} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/preprocess>)

View File

@ -8,6 +8,21 @@
namespace mmdeploy {
void SaveIntermediates(Value& value, Value::Array& intermediates) {
if (value.is_array()) {
for (auto& inner : value) {
if (auto it = inner.find("__data__"); it != inner.end()) {
std::move(it->begin(), it->end(), std::back_inserter(intermediates));
it->array().clear();
}
}
} else if (value.is_object()) {
if (auto it = value.find("__data__"); it != value.end()) {
std::move(it->begin(), it->end(), std::back_inserter(intermediates));
it->array().clear();
}
}
}
Compose::Compose(const Value& args, int version) : Transform(args) {
assert(args.contains("context"));
@ -44,10 +59,7 @@ Result<Value> Compose::Process(const Value& input) {
Value::Array intermediates;
for (auto& transform : transforms_) {
OUTCOME_TRY(auto t, transform->Process(output));
if (auto it = t.find("__data__"); it != t.end()) {
std::move(it->begin(), it->end(), std::back_inserter(intermediates));
it->array().clear();
}
SaveIntermediates(t, intermediates);
output = std::move(t);
}
OUTCOME_TRY(stream_.Wait());

View File

@ -0,0 +1,42 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/preprocess/transform/lift.h"
#include "mmdeploy/archive/json_archive.h"
#include "mmdeploy/archive/value_archive.h"
#include "mmdeploy/core/utils/formatter.h"
namespace mmdeploy {
Lift::Lift(const Value& args, int version) : Transform(args) {
std::string type = "Compose";
auto creator = Registry<Transform>::Get().GetCreator(type, version);
if (!creator) {
MMDEPLOY_ERROR("Unable to find Transform creator: {}. Available transforms: {}", type,
Registry<Transform>::Get().List());
throw_exception(eEntryNotFound);
}
compose_ = creator->Create(args);
}
Result<Value> Lift::Process(const Value& input) {
Value output;
for (int i = 0; i < input.size(); i++) {
Value single = input[i];
OUTCOME_TRY(auto t, compose_->Process(single));
output.push_back(std::move(t));
}
return std::move(output);
}
class LiftCreator : public Creator<Transform> {
public:
const char* GetName() const override { return "Lift"; }
int GetVersion() const override { return version_; }
ReturnType Create(const Value& args) override { return std::make_unique<Lift>(args, version_); }
private:
int version_{1};
};
REGISTER_MODULE(Transform, LiftCreator);
} // namespace mmdeploy

View File

@ -0,0 +1,23 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_PREPROCESS_TRANSFORM_LIFT_H_
#define MMDEPLOY_SRC_PREPROCESS_TRANSFORM_LIFT_H_
#include "mmdeploy/preprocess/transform/transform.h"
namespace mmdeploy {
class MMDEPLOY_API Lift : public Transform {
public:
explicit Lift(const Value& args, int version = 0);
~Lift() override = default;
Result<Value> Process(const Value& input) override;
private:
std::unique_ptr<Transform> compose_;
};
} // namespace mmdeploy
#endif // MMDEPLOY_SRC_PREPROCESS_TRANSFORM_Lift_H_

View File

@ -0,0 +1,90 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/preprocess/transform/ten_crop.h"
#include "mmdeploy/archive/json_archive.h"
using namespace std;
namespace mmdeploy {
TenCropImpl::TenCropImpl(const Value& args) : TransformImpl(args) {
// (w, h) of crop size
if (!args.contains(("crop_size"))) {
throw std::invalid_argument("'crop_size' is expected");
}
if (args["crop_size"].is_number_integer()) {
int crop_size = args["crop_size"].get<int>();
arg_.crop_size[0] = arg_.crop_size[1] = crop_size;
} else if (args["crop_size"].is_array() && args["crop_size"].size() == 2) {
arg_.crop_size[0] = args["crop_size"][0].get<int>();
arg_.crop_size[1] = args["crop_size"][1].get<int>();
} else {
throw std::invalid_argument("'crop_size' should be integer or an int array of size 2");
}
}
Result<Value> TenCropImpl::Process(const Value& input) {
MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
// copy input data, and update its properties
Value output = input;
auto tensor = input["img"].get<Tensor>();
int img_h = tensor.shape(1);
int img_w = tensor.shape(2);
int crop_w = arg_.crop_size[0];
int crop_h = arg_.crop_size[1];
int w_step = (img_w - crop_w) / 4;
int h_step = (img_h - crop_h) / 4;
std::array<std::pair<int, int>, 5> offsets = {{{0, 0},
{4 * w_step, 0},
{0, 4 * h_step},
{4 * w_step, 4 * h_step},
{2 * w_step, 2 * h_step}}};
vector<Tensor> cropped;
cropped.reserve(10);
for (const auto& [offx, offy] : offsets) {
int y1 = offy;
int y2 = offy + crop_h - 1;
int x1 = offx;
int x2 = offx + crop_w - 1;
OUTCOME_TRY(auto cropped_tensor, CropImage(tensor, y1, x1, y2, x2));
OUTCOME_TRY(auto flipped_tensor, HorizontalFlip(cropped_tensor));
cropped.push_back(std::move(cropped_tensor));
cropped.push_back(std::move(flipped_tensor));
}
output["imgs"] = Value{};
for (int i = 0; i < cropped.size(); i++) {
output["imgs"].push_back(cropped[i]);
output["__data__"].push_back(std::move(cropped[i]));
}
return output;
}
TenCrop::TenCrop(const Value& args, int version) : Transform(args) {
auto impl_creator = Registry<TenCropImpl>::Get().GetCreator(specified_platform_, version);
if (nullptr == impl_creator) {
MMDEPLOY_ERROR("'TenCrop' is not supported on '{}' platform", specified_platform_);
throw std::domain_error("'Resize' is not supported on specified platform");
}
impl_ = impl_creator->Create(args);
}
class TenCropCreator : public Creator<Transform> {
public:
const char* GetName(void) const override { return "TenCrop"; }
int GetVersion(void) const override { return version_; }
ReturnType Create(const Value& args) override {
return std::make_unique<TenCrop>(args, version_);
}
private:
int version_{1};
};
REGISTER_MODULE(Transform, TenCropCreator);
MMDEPLOY_DEFINE_REGISTRY(TenCropImpl);
} // namespace mmdeploy

View File

@ -0,0 +1,49 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_TEN_CROP_H
#define MMDEPLOY_TEN_CROP_H
#include <array>
#include "mmdeploy/core/tensor.h"
#include "transform.h"
namespace mmdeploy {
class MMDEPLOY_API TenCropImpl : public TransformImpl {
public:
explicit TenCropImpl(const Value& args);
~TenCropImpl() override = default;
Result<Value> Process(const Value& input) override;
protected:
virtual Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
int right) = 0;
virtual Result<Tensor> HorizontalFlip(const Tensor& tensor) = 0;
protected:
struct ten_crop_arg_t {
std::array<int, 2> crop_size;
};
using ArgType = struct ten_crop_arg_t;
protected:
ArgType arg_;
};
class MMDEPLOY_API TenCrop : public Transform {
public:
explicit TenCrop(const Value& args, int version = 0);
~TenCrop() override = default;
Result<Value> Process(const Value& input) override { return impl_->Process(input); }
protected:
std::unique_ptr<TenCropImpl> impl_;
};
MMDEPLOY_DECLARE_REGISTRY(TenCropImpl);
} // namespace mmdeploy
#endif

View File

@ -0,0 +1,101 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/preprocess/transform/three_crop.h"
#include "mmdeploy/archive/json_archive.h"
using namespace std;
namespace mmdeploy {
Result<void> check_input_shape(int img_h, int img_w, int crop_h, int crop_w) {
if (img_h == crop_h || img_w == crop_w) {
return success();
}
MMDEPLOY_ERROR("ThreeCrop error, img_h: {} != crop_h: {} && img_w: {} != crop_w {}", img_h,
crop_h, img_w, crop_w);
return Status(eInvalidArgument);
}
ThreeCropImpl::ThreeCropImpl(const Value& args) : TransformImpl(args) {
// (w, h) of crop size
if (!args.contains(("crop_size"))) {
throw std::invalid_argument("'crop_size' is expected");
}
if (args["crop_size"].is_number_integer()) {
int crop_size = args["crop_size"].get<int>();
arg_.crop_size[0] = arg_.crop_size[1] = crop_size;
} else if (args["crop_size"].is_array() && args["crop_size"].size() == 2) {
arg_.crop_size[0] = args["crop_size"][0].get<int>();
arg_.crop_size[1] = args["crop_size"][1].get<int>();
} else {
throw std::invalid_argument("'crop_size' should be integer or an int array of size 2");
}
}
Result<Value> ThreeCropImpl::Process(const Value& input) {
MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
// copy input data, and update its properties
Value output = input;
auto tensor = input["img"].get<Tensor>();
auto desc = tensor.desc();
int img_h = desc.shape[1];
int img_w = desc.shape[2];
int crop_w = arg_.crop_size[0];
int crop_h = arg_.crop_size[1];
OUTCOME_TRY(check_input_shape(img_h, img_w, crop_h, crop_w));
std::array<std::pair<int, int>, 3> offsets;
if (crop_h == img_h) {
int w_step = (img_w - crop_w) / 2;
offsets = {{{0, 0}, {2 * w_step, 0}, {w_step, 0}}};
} else if (crop_w == img_w) {
int h_step = (img_h - crop_h) / 2;
offsets = {{{0, 0}, {0, 2 * h_step}, {0, h_step}}};
}
vector<Tensor> cropped;
cropped.reserve(3);
for (const auto& [offx, offy] : offsets) {
int y1 = offy;
int y2 = offy + crop_h - 1;
int x1 = offx;
int x2 = offx + crop_w - 1;
OUTCOME_TRY(auto dst_tensor, CropImage(tensor, y1, x1, y2, x2));
cropped.push_back(std::move(dst_tensor));
}
output["imgs"] = Value{};
for (int i = 0; i < cropped.size(); i++) {
output["imgs"].push_back(cropped[i]);
output["__data__"].push_back(std::move(cropped[i]));
}
return output;
}
ThreeCrop::ThreeCrop(const Value& args, int version) : Transform(args) {
auto impl_creator = Registry<ThreeCropImpl>::Get().GetCreator(specified_platform_, version);
if (nullptr == impl_creator) {
MMDEPLOY_ERROR("'ThreeCrop' is not supported on '{}' platform", specified_platform_);
throw std::domain_error("'Resize' is not supported on specified platform");
}
impl_ = impl_creator->Create(args);
}
class ThreeCropCreator : public Creator<Transform> {
public:
const char* GetName(void) const override { return "ThreeCrop"; }
int GetVersion(void) const override { return version_; }
ReturnType Create(const Value& args) override {
return std::make_unique<ThreeCrop>(args, version_);
}
private:
int version_{1};
};
REGISTER_MODULE(Transform, ThreeCropCreator);
MMDEPLOY_DEFINE_REGISTRY(ThreeCropImpl);
} // namespace mmdeploy

View File

@ -0,0 +1,48 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_THREE_CROP_H
#define MMDEPLOY_THREE_CROP_H
#include <array>
#include "mmdeploy/core/tensor.h"
#include "transform.h"
namespace mmdeploy {
class MMDEPLOY_API ThreeCropImpl : public TransformImpl {
public:
explicit ThreeCropImpl(const Value& args);
~ThreeCropImpl() override = default;
Result<Value> Process(const Value& input) override;
protected:
virtual Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
int right) = 0;
protected:
struct three_crop_arg_t {
std::array<int, 2> crop_size;
};
using ArgType = struct three_crop_arg_t;
protected:
ArgType arg_;
};
class MMDEPLOY_API ThreeCrop : public Transform {
public:
explicit ThreeCrop(const Value& args, int version = 0);
~ThreeCrop() override = default;
Result<Value> Process(const Value& input) override { return impl_->Process(input); }
protected:
std::unique_ptr<ThreeCropImpl> impl_;
};
MMDEPLOY_DECLARE_REGISTRY(ThreeCropImpl);
} // namespace mmdeploy
#endif

View File

@ -38,6 +38,7 @@ add_example(restorer c image_restorer)
add_example(text_detector c ocr)
add_example(pose_detector c pose_detection)
add_example(rotated_detector c rotated_object_detection)
add_example(video_recognizer c video_recognition)
# TODO: figure out a better way
#add_example("" c det_cls)
#add_example("" c det_pose)
@ -52,4 +53,5 @@ if (MMDEPLOY_BUILD_SDK_CXX_API)
add_example(pose_detector cpp pose_detector)
add_example(rotated_detector cpp rotated_detector)
add_example(pose_detector cpp pose_tracker)
add_example(video_recognizer cpp video_cls)
endif ()

View File

@ -0,0 +1,111 @@
#include <fstream>
#include <map>
#include <opencv2/imgcodecs/imgcodecs.hpp>
#include <opencv2/videoio.hpp>
#include <set>
#include <string>
#include <vector>
#include "mmdeploy/video_recognizer.h"
void SampleFrames(const char* video_path, std::map<int, cv::Mat>& buffer,
std::vector<mmdeploy_mat_t>& clips, int clip_len, int frame_interval = 1,
int num_clips = 1) {
cv::VideoCapture cap = cv::VideoCapture(video_path);
if (!cap.isOpened()) {
fprintf(stderr, "failed to load video: %s\n", video_path);
exit(1);
}
int num_frames = cap.get(cv::CAP_PROP_FRAME_COUNT);
printf("num_frames %d\n", num_frames);
int ori_clip_len = clip_len * frame_interval;
float avg_interval = (num_frames - ori_clip_len + 1.f) / num_clips;
std::vector<int> frame_inds;
for (int i = 0; i < num_clips; i++) {
int clip_offset = i * avg_interval + avg_interval / 2.0;
for (int j = 0; j < clip_len; j++) {
int ind = (j * frame_interval + clip_offset) % num_frames;
if (num_frames <= ori_clip_len - 1) {
ind = j % num_frames;
}
frame_inds.push_back(ind);
}
}
std::vector<int> unique_inds(frame_inds.begin(), frame_inds.end());
std::sort(unique_inds.begin(), unique_inds.end());
auto last = std::unique(unique_inds.begin(), unique_inds.end());
unique_inds.erase(last, unique_inds.end());
int ind = 0;
for (int i = 0; i < unique_inds.size(); i++) {
int tid = unique_inds[i];
cv::Mat frame;
while (ind < tid) {
cap.read(frame);
ind++;
}
cap.read(frame);
buffer[tid] = frame;
ind++;
}
clips.resize(frame_inds.size());
for (int i = 0; i < frame_inds.size(); i++) {
auto& img = buffer[frame_inds[i]];
mmdeploy_mat_t mat{
img.data, img.rows, img.cols, 3, MMDEPLOY_PIXEL_FORMAT_BGR, MMDEPLOY_DATA_TYPE_UINT8};
clips[i] = mat;
}
}
int main(int argc, char* argv[]) {
if (argc != 7) {
fprintf(stderr,
"usage:\n video_recognition device_name dump_model_directory video_path clip_len "
"frame_interval num_clips \n");
return 1;
}
auto device_name = argv[1];
auto model_path = argv[2];
auto video_path = argv[3];
int clip_len = std::stoi(argv[4]);
int frame_interval = std::stoi(argv[5]);
int num_clips = std::stoi(argv[6]);
std::map<int, cv::Mat> buffer;
std::vector<mmdeploy_mat_t> clips;
std::vector<mmdeploy_video_sample_info_t> clip_info;
SampleFrames(video_path, buffer, clips, clip_len, frame_interval, num_clips);
clip_info.push_back({clip_len, num_clips});
mmdeploy_video_recognizer_t recognizer{};
int status{};
status = mmdeploy_video_recognizer_create_by_path(model_path, device_name, 0, &recognizer);
if (status != MMDEPLOY_SUCCESS) {
fprintf(stderr, "failed to create recognizer, code: %d\n", (int)status);
return 1;
}
mmdeploy_video_recognition_t* res{};
int* res_count{};
status = mmdeploy_video_recognizer_apply(recognizer, clips.data(), clip_info.data(), 1, &res,
&res_count);
if (status != MMDEPLOY_SUCCESS) {
fprintf(stderr, "failed to apply classifier, code: %d\n", (int)status);
return 1;
}
for (int i = 0; i < res_count[0]; ++i) {
fprintf(stderr, "label: %d, score: %.4f\n", res[i].label_id, res[i].score);
}
mmdeploy_video_recognizer_release_result(res, res_count, 1);
mmdeploy_video_recognizer_destroy(recognizer);
return 0;
}

View File

@ -0,0 +1,90 @@
#include <map>
#include <string>
#include "mmdeploy/video_recognizer.hpp"
#include "opencv2/imgcodecs/imgcodecs.hpp"
#include "opencv2/videoio.hpp"
void SampleFrames(const char* video_path, std::map<int, cv::Mat>& buffer,
std::vector<mmdeploy::Mat>& clips, int clip_len, int frame_interval = 1,
int num_clips = 1) {
cv::VideoCapture cap = cv::VideoCapture(video_path);
if (!cap.isOpened()) {
fprintf(stderr, "failed to load video: %s\n", video_path);
exit(1);
}
int num_frames = cap.get(cv::CAP_PROP_FRAME_COUNT);
printf("num_frames %d\n", num_frames);
int ori_clip_len = clip_len * frame_interval;
float avg_interval = (num_frames - ori_clip_len + 1.f) / num_clips;
std::vector<int> frame_inds;
for (int i = 0; i < num_clips; i++) {
int clip_offset = i * avg_interval + avg_interval / 2.0;
for (int j = 0; j < clip_len; j++) {
int ind = (j * frame_interval + clip_offset) % num_frames;
if (num_frames <= ori_clip_len - 1) {
ind = j % num_frames;
}
frame_inds.push_back(ind);
}
}
std::vector<int> unique_inds(frame_inds.begin(), frame_inds.end());
std::sort(unique_inds.begin(), unique_inds.end());
auto last = std::unique(unique_inds.begin(), unique_inds.end());
unique_inds.erase(last, unique_inds.end());
int ind = 0;
for (int i = 0; i < unique_inds.size(); i++) {
int tid = unique_inds[i];
cv::Mat frame;
while (ind < tid) {
cap.read(frame);
ind++;
}
cap.read(frame);
buffer[tid] = frame;
ind++;
}
clips.resize(frame_inds.size());
for (int i = 0; i < frame_inds.size(); i++) {
auto& img = buffer[frame_inds[i]];
clips[i] = img;
}
}
int main(int argc, char* argv[]) {
if (argc != 7) {
fprintf(stderr,
"usage:\n video_cls device_name model_path video_path video_path clip_len "
"frame_interval num_clips\n");
return 1;
}
auto device_name = argv[1];
auto model_path = argv[2];
auto video_path = argv[3];
int clip_len = std::stoi(argv[4]);
int frame_interval = std::stoi(argv[5]);
int num_clips = std::stoi(argv[6]);
std::map<int, cv::Mat> buffer;
std::vector<mmdeploy::Mat> clips;
mmdeploy::VideoSampleInfo clip_info = {clip_len, num_clips};
SampleFrames(video_path, buffer, clips, clip_len, frame_interval, num_clips);
mmdeploy::Model model(model_path);
mmdeploy::VideoRecognizer recognizer(model, mmdeploy::Device{device_name, 0});
auto res = recognizer.Apply(clips, clip_info);
for (const auto& cls : res) {
fprintf(stderr, "label: %d, score: %.4f\n", cls.label_id, cls.score);
}
return 0;
}

View File

@ -0,0 +1,79 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import cv2
from mmdeploy_python import VideoRecognizer
def parse_args():
parser = argparse.ArgumentParser(
description='show how to use sdk python api')
parser.add_argument('device_name', help='name of device, cuda or cpu')
parser.add_argument(
'model_path',
help='path of mmdeploy SDK model dumped by model converter')
parser.add_argument('video_path', help='path of an video')
parser.add_argument(
'--clip_len', help='Frames of each sampled output clip', default=1)
parser.add_argument(
'--frame_interval',
help='Temporal interval of adjacent sampled frames.',
default=1)
parser.add_argument(
'--num_clips', help='Number of clips to be sampled', default=25)
args = parser.parse_args()
return args
def SampleFrames(cap, clip_len, frame_interval, num_clips):
if not cap.isOpened():
print('failed to load video')
exit(-1)
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
ori_clip_len = clip_len * frame_interval
avg_interval = (num_frames - ori_clip_len + 1) / float(num_clips)
frame_inds = []
for i in range(num_clips):
clip_offset = int(i * avg_interval + avg_interval / 2.0)
for j in range(clip_len):
ind = (j * frame_interval + clip_offset) % num_frames
if num_frames <= ori_clip_len - 1:
ind = j % num_frames
frame_inds.append(ind)
unique_inds = sorted(list(set(frame_inds)))
buffer = {}
ind = 0
for i, tid in enumerate(unique_inds):
while ind < tid:
_, mat = cap.read()
ind += 1
_, mat = cap.read()
buffer[tid] = mat
ind += 1
clips = []
for tid in frame_inds:
clips.append(buffer[tid])
info = (clip_len, num_clips)
return clips, info
def main():
args = parse_args()
cap = cv2.VideoCapture(args.video_path)
recognizer = VideoRecognizer(
model_path=args.model_path, device_name=args.device_name, device_id=0)
clips, info = SampleFrames(cap, args.clip_len, args.frame_interval,
args.num_clips)
result = recognizer(clips, info)
for label_id, score in result:
print(label_id, score)
if __name__ == '__main__':
main()

View File

@ -1812,6 +1812,76 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
</table>
</div>
<div style="margin-left: 25px;">
<table class="docutils">
<thead>
<tr>
<th align="center" colspan="4">mmaction2</th>
<th align="center">Pytorch</th>
<th align="center">ONNXRuntime</th>
<th align="center" colspan="2">TensorRT</th>
<th align="center">PPLNN</th>
<th align="center">OpenVINO</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">model</td>
<td align="center">task</td>
<td align="center">dataset</td>
<td align="center">metrics</td>
<td align="center">fp32</td>
<td align="center">fp32</td>
<td align="center">fp32</td>
<td align="center">fp16</td>
<td align="center">fp16</td>
<td align="center">fp32</td>
</tr>
<tr>
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py">TSN</a></td>
<td align="center" rowspan="2">Recognition</td>
<td align="center" rowspan="2">Kinetics-400</td>
<td align="center">top-1</td>
<td align="center">69.71</td>
<td align="center">-</td>
<td align="center">69.71</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">top-5</td>
<td align="center">88.75</td>
<td align="center">-</td>
<td align="center">88.75</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/slowfast/slowfast_r50_8xb8-4x16x1-256e_kinetics400-rgb.py">SlowFast</a></td>
<td align="center" rowspan="2">Recognition</td>
<td align="center" rowspan="2">Kinetics-400</td>
<td align="center">top-1</td>
<td align="center">74.45</td>
<td align="center">-</td>
<td align="center">75.62</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">top-5</td>
<td align="center">91.55</td>
<td align="center">-</td>
<td align="center">92.10</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
</tbody>
</table>
</div>
## Notes
- As some datasets contain images with various resolutions in codebase like MMDet. The speed benchmark is gained through static configs in MMDeploy, while the performance benchmark is gained through dynamic ones.

View File

@ -0,0 +1,190 @@
# MMAction2 Deployment
- [MMAction2 Deployment](#mmaction2-deployment)
- [Installation](#installation)
- [Install mmaction2](#install-mmaction2)
- [Install mmdeploy](#install-mmdeploy)
- [Convert model](#convert-model)
- [Convert video recognition model](#convert-video-recognition-model)
- [Model specification](#model-specification)
- [Model Inference](#model-inference)
- [Backend model inference](#backend-model-inference)
- [SDK model inference](#sdk-model-inference)
- [Video recognition SDK model inference](#video-recognition-sdk-model-inference)
- [Supported models](#supported-models)
______________________________________________________________________
[MMAction2](https://github.com/open-mmlab/mmaction2) is an open-source toolbox for video understanding based on PyTorch. It is a part of the [OpenMMLab](https://openmmlab.com) project.
## Installation
### Install mmaction2
Please follow the [installation guide](https://github.com/open-mmlab/mmaction2/tree/dev-1.x#installation) to install mmocr.
### Install mmdeploy
There are several methods to install mmdeploy, among which you can choose an appropriate one according to your target platform and device.
**Method I** Install precompiled package
You can download the latest release package from [here](https://github.com/open-mmlab/mmdeploy/releases)
**Method II** Build using scripts
If your target platform is **Ubuntu 18.04 or later version**, we encourage you to run
[scripts](../01-how-to-build/build_from_script.md). For example, the following commands install mmdeploy as well as inference engine - `ONNX Runtime`.
```shell
git clone --recursive -b dev-1.x https://github.com/open-mmlab/mmdeploy.git
cd mmdeploy
python3 tools/scripts/build_ubuntu_x64_ort.py $(nproc)
export PYTHONPATH=$(pwd)/build/lib:$PYTHONPATH
export LD_LIBRARY_PATH=$(pwd)/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1/lib/:$LD_LIBRARY_PATH
```
**Method III:** Build from source
If neither **I** nor **II** meets your requirements, [building mmdeploy from source](../01-how-to-build/build_from_source.md) is the last option.
## Convert model
You can use [tools/deploy.py](https://github.com/open-mmlab/mmdeploy/blob/dev-1.x/tools/deploy.py) to convert mmocr models to the specified backend models. Its detailed usage can be learned from [here](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/02-how-to-run/convert_model.md#usage).
When using `tools/deploy.py`, it is crucial to specify the correct deployment config. We've already provided builtin deployment config [files](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/configs/mmaction) of all supported backends for mmocr, under which the config file path follows the pattern:
```
{task}/{task}_{backend}-{precision}_{static | dynamic}_{shape}.py
```
其中:
- **{task}:** task in mmaction2.
- **{backend}:** inference backend, such as onnxruntime, tensorrt, pplnn, ncnn, openvino, coreml etc.
- **{precision}:** fp16, int8. When it's empty, it means fp32
- **{static | dynamic}:** static shape or dynamic shape
- **{shape}:** input shape or shape range of a model
- **{2d/3d}:** model type
In the next partwe will take `tsn` model from `video recognition` task as an example, showing how to convert them to onnx model that can be inferred by ONNX Runtime.
### Convert video recognition model
```shell
cd mmdeploy
# download tsn model from mmaction2 model zoo
mim download mmaction2 --config tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb --dest .
# convert mmaction2 model to onnxruntime model with dynamic shape
python tools/deploy.py \
configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py \
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb \
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220906-cd10898e.pth \
tests/data/arm_wrestling.mp4 \
--work-dir mmdeploy_models/mmaction/tsn/ort \
--device cpu \
--show \
--dump-info
```
## Model specification
Before moving on to model inference chapter, let's know more about the converted model structure which is very important for model inference.
The converted model locates in the working directory like `mmdeploy_models/mmaction/tsn/ort` in the previous example. It includes:
```
mmdeploy_models/mmaction/tsn/ort
├── deploy.json
├── detail.json
├── end2end.onnx
└── pipeline.json
```
in which,
- **end2end.onnx**: backend model which can be inferred by ONNX Runtime
- \***.json**: the necessary information for mmdeploy SDK
The whole package **mmdeploy_models/mmocr/dbnet/ort** is defined as **mmdeploy SDK model**, i.e., **mmdeploy SDK model** includes both backend model and inference meta information.
## Model Inference
### Backend model inference
Take the previous converted `end2end.onnx` mode of `tsn` as an example, you can use the following code to inference the model and visualize the results.
```python
from mmdeploy.apis.utils import build_task_processor
from mmdeploy.utils import get_input_shape, load_config
import numpy as np
import torch
deploy_cfg = 'configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py'
model_cfg = 'tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb'
device = 'cpu'
backend_model = ['./mmdeploy_models/mmaction2/tsn/ort/end2end.onnx']
image = 'tests/data/arm_wrestling.mp4'
# read deploy_cfg and model_cfg
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
# build task and backend model
task_processor = build_task_processor(model_cfg, deploy_cfg, device)
model = task_processor.build_backend_model(backend_model)
# process input image
input_shape = get_input_shape(deploy_cfg)
model_inputs, _ = task_processor.create_input(image, input_shape)
# do model inference
with torch.no_grad():
result = model.test_step(model_inputs)
# show top5-results
pred_scores = result[0].pred_scores.item.tolist()
top_index = np.argsort(pred_scores)[::-1]
for i in range(5):
index = top_index[i]
print(index, pred_scores[index])
```
### SDK model inference
Given the above SDK model of `tsn` you can also perform SDK model inference like following,
#### Video recognition SDK model inference
```python
from mmdeploy_python import VideoRecognizer
import cv2
# refer to demo/python/video_recognition.py
# def SampleFrames(cap, clip_len, frame_interval, num_clips):
# ...
cap = cv2.VideoCapture('tests/data/arm_wrestling.mp4')
clips, info = SampleFrames(cap, 1, 1, 25)
# create a recognizer
recognizer = VideoRecognizer(model_path='./mmdeploy_models/mmaction/tsn/ort', device_name='cpu', device_id=0)
# perform inference
result = recognizer(clips, info)
# show inference result
for label_id, score in result:
print(label_id, score)
```
Besides python API, mmdeploy SDK also provides other FFI (Foreign Function Interface), such as C, C++, C#, Java and so on. You can learn their usage from [demos](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/demo).
> MMAction2 only API of c, c++ and python for now.
## Supported models
| Model | TorchScript | ONNX Runtime | TensorRT | ncnn | PPLNN | OpenVINO |
| :-------------------------------------------------------------------------------------------- | :---------: | :----------: | :------: | :--: | :---: | :------: |
| [TSN](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/tsn) | N | Y | Y | N | N | N |
| [SlowFast](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/slowfast) | N | Y | Y | N | N | N |

View File

@ -1807,6 +1807,76 @@ GPU: ncnn, TensorRT, PPLNN
</table>
</div>
<div style="margin-left: 25px;">
<table class="docutils">
<thead>
<tr>
<th align="center" colspan="4">mmaction2</th>
<th align="center">Pytorch</th>
<th align="center">ONNXRuntime</th>
<th align="center" colspan="2">TensorRT</th>
<th align="center">PPLNN</th>
<th align="center">OpenVINO</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">model</td>
<td align="center">task</td>
<td align="center">dataset</td>
<td align="center">metrics</td>
<td align="center">fp32</td>
<td align="center">fp32</td>
<td align="center">fp32</td>
<td align="center">fp16</td>
<td align="center">fp16</td>
<td align="center">fp32</td>
</tr>
<tr>
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py">TSN</a></td>
<td align="center" rowspan="2">Recognition</td>
<td align="center" rowspan="2">Kinetics-400</td>
<td align="center">top-1</td>
<td align="center">69.71</td>
<td align="center">-</td>
<td align="center">69.71</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">top-5</td>
<td align="center">88.75</td>
<td align="center">-</td>
<td align="center">88.75</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/slowfast/slowfast_r50_8xb8-4x16x1-256e_kinetics400-rgb.py">SlowFast</a></td>
<td align="center" rowspan="2">Recognition</td>
<td align="center" rowspan="2">Kinetics-400</td>
<td align="center">top-1</td>
<td align="center">74.45</td>
<td align="center">-</td>
<td align="center">75.62</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="center">top-5</td>
<td align="center">91.55</td>
<td align="center">-</td>
<td align="center">92.10</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
</tbody>
</table>
</div>
## 备注
- 由于某些数据集在代码库中包含各种分辨率的图像,例如 MMDet速度基准是通过 MMDeploy 中的静态配置获得的,而性能基准是通过动态配置获得的

View File

@ -0,0 +1,193 @@
# MMAction2 模型部署
- [MMAction2 模型部署](#mmaction2-模型部署)
- [安装](#安装)
- [安装 mmaction2](#安装-mmaction2)
- [安装 mmdeploy](#安装-mmdeploy)
- [模型转换](#模型转换)
- [视频分类任务模型转换](#视频分类任务模型转换)
- [模型规范](#模型规范)
- [模型推理](#模型推理)
- [后端模型推理](#后端模型推理)
- [SDK 模型推理](#sdk-模型推理)
- [视频分类 SDK 模型推理](#视频分类-sdk-模型推理)
- [模型支持列表](#模型支持列表)
______________________________________________________________________
[MMAction2](https://github.com/open-mmlab/mmaction2)是一款基于 PyTorch 的视频理解开源工具箱,是[OpenMMLab](https://openmmlab.com)项目的成员之一。
## 安装
### 安装 mmaction2
请参考[官网安装指南](https://github.com/open-mmlab/mmaction2/tree/dev-1.x#installation).
### 安装 mmdeploy
mmdeploy 有以下几种安装方式:
**方式一:** 安装预编译包
通过此[链接](https://github.com/open-mmlab/mmdeploy/releases)获取最新的预编译包
**方式二:** 一键式脚本安装
如果部署平台是 **Ubuntu 18.04 及以上版本** 请参考[脚本安装说明](../01-how-to-build/build_from_script.md),完成安装过程。
比如,以下命令可以安装 mmdeploy 以及配套的推理引擎——`ONNX Runtime`.
```shell
git clone --recursive -b dev-1.x https://github.com/open-mmlab/mmdeploy.git
cd mmdeploy
python3 tools/scripts/build_ubuntu_x64_ort.py $(nproc)
export PYTHONPATH=$(pwd)/build/lib:$PYTHONPATH
export LD_LIBRARY_PATH=$(pwd)/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1/lib/:$LD_LIBRARY_PATH
```
**方式三:** 源码安装
在方式一、二都满足不了的情况下,请参考[源码安装说明](../01-how-to-build/build_from_source.md) 安装 mmdeploy 以及所需推理引擎。
## 模型转换
你可以使用 [tools/deploy.py](https://github.com/open-mmlab/mmdeploy/blob/dev-1.x/tools/deploy.py) 把 mmaction2 模型一键式转换为推理后端模型。
该工具的详细使用说明请参考[这里](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/02-how-to-run/convert_model.md#usage).
转换的关键之一是使用正确的配置文件。项目中已内置了各后端部署[配置文件](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/configs/mmaction)。
文件的命名模式是:
```
{task}/{task}_{backend}-{precision}_{static | dynamic}_{shape}.py
```
其中:
- **{task}:** mmaction2 中的任务
- **{backend}:** 推理后端名称。比如onnxruntime、tensorrt、pplnn、ncnn、openvino、coreml 等等
- **{precision}:** 推理精度。比如fp16、int8。不填表示 fp32
- **{static | dynamic}:** 动态、静态 shape
- **{shape}:** 模型输入的 shape 或者 shape 范围
- **{2d/3d}:** 表示模型的类别
以下,我们将演示如何把视频分类任务中 `tsn` 模型转换为 onnx 模型。
### 视频分类任务模型转换
```shell
cd mmdeploy
# download tsn model from mmaction2 model zoo
mim download mmaction2 --config tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb --dest .
# convert mmaction2 model to onnxruntime model with dynamic shape
python tools/deploy.py \
configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py \
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb \
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220906-cd10898e.pth \
tests/data/arm_wrestling.mp4 \
--work-dir mmdeploy_models/mmaction/tsn/ort \
--device cpu \
--show \
--dump-info
```
## 模型规范
在使用转换后的模型进行推理之前,有必要了解转换结果的结构。 它存放在 `--work-dir` 指定的路路径下。
上例中的`mmdeploy_models/mmaction/tsn/ort`,结构如下:
```
mmdeploy_models/mmaction/tsn/ort
├── deploy.json
├── detail.json
├── end2end.onnx
└── pipeline.json
```
重要的是:
- **end2end.onnx**: 推理引擎文件。可用 ONNX Runtime 推理
- \***.json**: mmdeploy SDK 推理所需的 meta 信息
整个文件夹被定义为**mmdeploy SDK model**。换言之,**mmdeploy SDK model**既包括推理引擎,也包括推理 meta 信息。
## 模型推理
### 后端模型推理
以上述模型转换后的 `end2end.onnx` 为例,你可以使用如下代码进行推理:
```python
from mmdeploy.apis.utils import build_task_processor
from mmdeploy.utils import get_input_shape, load_config
import numpy as np
import torch
deploy_cfg = 'configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py'
model_cfg = 'tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb'
device = 'cpu'
backend_model = ['./mmdeploy_models/mmaction2/tsn/ort/end2end.onnx']
image = 'tests/data/arm_wrestling.mp4'
# read deploy_cfg and model_cfg
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
# build task and backend model
task_processor = build_task_processor(model_cfg, deploy_cfg, device)
model = task_processor.build_backend_model(backend_model)
# process input image
input_shape = get_input_shape(deploy_cfg)
model_inputs, _ = task_processor.create_input(image, input_shape)
# do model inference
with torch.no_grad():
result = model.test_step(model_inputs)
# show top5-results
pred_scores = result[0].pred_scores.item.tolist()
top_index = np.argsort(pred_scores)[::-1]
for i in range(5):
index = top_index[i]
print(index, pred_scores[index])
```
### SDK 模型推理
你也可以参考如下代码,对 SDK model 进行推理:
#### 视频分类 SDK 模型推理
```python
from mmdeploy_python import VideoRecognizer
import cv2
# refer to demo/python/video_recognition.py
# def SampleFrames(cap, clip_len, frame_interval, num_clips):
# ...
cap = cv2.VideoCapture('tests/data/arm_wrestling.mp4')
clips, info = SampleFrames(cap, 1, 1, 25)
# create a recognizer
recognizer = VideoRecognizer(model_path='./mmdeploy_models/mmaction/tsn/ort', device_name='cpu', device_id=0)
# perform inference
result = recognizer(clips, info)
# show inference result
for label_id, score in result:
print(label_id, score)
```
除了python APImmdeploy SDK 还提供了诸如 C、C++、C#、Java等多语言接口。
你可以参考[样例](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/demo)学习其他语言接口的使用方法。
> mmaction2 的 C#Java接口待开发
## 模型支持列表
| Model | TorchScript | ONNX Runtime | TensorRT | ncnn | PPLNN | OpenVINO |
| :-------------------------------------------------------------------------------------------- | :---------: | :----------: | :------: | :--: | :---: | :------: |
| [TSN](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/tsn) | N | Y | Y | N | N | N |
| [SlowFast](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/slowfast) | N | Y | Y | N | N | N |

View File

@ -164,8 +164,9 @@ def get_preprocess(deploy_cfg: mmengine.Config, model_cfg: mmengine.Config,
for transform in transforms:
if transform['type'] == 'Normalize':
transform['to_float'] = False
assert transforms[0]['type'] == 'LoadImageFromFile', 'The first item'\
' type of pipeline should be LoadImageFromFile'
if transforms[0]['type'] != 'Lift':
assert transforms[0]['type'] == 'LoadImageFromFile', \
'The first item type of pipeline should be LoadImageFromFile'
return dict(
type='Task',
module='Transform',
@ -244,7 +245,8 @@ def get_pipeline(deploy_cfg: mmengine.Config, model_cfg: mmengine.Config,
task = get_task_type(deploy_cfg)
input_names = preprocess['input']
output_names = postprocess['output']
if task == Task.CLASSIFICATION or task == Task.SUPER_RESOLUTION:
if task == Task.CLASSIFICATION or task == Task.SUPER_RESOLUTION \
or Task.VIDEO_RECOGNITION:
postprocess['input'] = infer_info['output']
else:
postprocess['input'] = preprocess['output'] + infer_info['output']

View File

@ -111,11 +111,16 @@ class VideoRecognition(BaseTask):
nn.Module: An initialized backend model.
"""
from .video_recognition_model import build_video_recognition_model
data_preprocessor = self.model_cfg.model.data_preprocessor
data_preprocessor.setdefault('type', 'mmaction.ActionDataPreprocessor')
model = build_video_recognition_model(
model_files, self.model_cfg, self.deploy_cfg, device=self.device)
model_files,
self.model_cfg,
self.deploy_cfg,
device=self.device,
data_preprocessor=data_preprocessor)
model.to(self.device)
model.eval()
return model
return model.eval()
def create_input(self,
imgs: Union[str, np.ndarray],
@ -242,7 +247,7 @@ class VideoRecognition(BaseTask):
"""
return input_data['inputs']
def get_preprocess(self) -> Dict:
def get_preprocess(self, *args, **kwargs) -> Dict:
"""Get the preprocess information for SDK.
Return:
@ -250,19 +255,70 @@ class VideoRecognition(BaseTask):
"""
input_shape = get_input_shape(self.deploy_cfg)
model_cfg = process_model_config(self.model_cfg, [''], input_shape)
preprocess = model_cfg.test_pipeline
return preprocess
pipeline = model_cfg.test_pipeline
data_preprocessor = self.model_cfg.model.data_preprocessor
def get_postprocess(self) -> Dict:
lift = dict(type='Lift', transforms=[])
lift['transforms'].append(dict(type='LoadImageFromFile'))
transforms2index = {}
for i, trans in enumerate(pipeline):
transforms2index[trans['type']] = i
lift_key = [
'Resize', 'Normalize', 'TenCrop', 'ThreeCrop', 'CenterCrop'
]
for key in lift_key:
if key == 'Normalize':
assert key not in transforms2index
mean = data_preprocessor.get('mean', [0, 0, 0])
std = data_preprocessor.get('std', [1, 1, 1])
trans = dict(type='Normalize', mean=mean, std=std, to_rgb=True)
lift['transforms'].append(trans)
if key in transforms2index:
index = transforms2index[key]
if key == 'Resize' and 'scale' in pipeline[index]:
value = pipeline[index].pop('scale')
if len(value) == 2 and value[0] == -1:
value = value[::-1]
pipeline[index]['size'] = value
lift['transforms'].append(pipeline[index])
meta_keys = [
'valid_ratio', 'flip', 'img_norm_cfg', 'filename', 'ori_shape',
'pad_shape', 'img_shape', 'flip_direction', 'scale_factor',
'ori_filename'
]
other = []
must_key = ['FormatShape', 'PackActionInputs']
for key in must_key:
assert key in transforms2index
index = transforms2index[key]
if key == 'PackActionInputs':
if 'meta_keys' in pipeline[index]:
meta_keys += pipeline[index]['meta_keys']
pipeline[index]['meta_keys'] = list(set(meta_keys))
pipeline[index]['keys'] = ['img']
pipeline[index]['type'] = 'Collect'
other.append(pipeline[index])
reorder = [lift, *other]
return reorder
def get_postprocess(self, *args, **kwargs) -> Dict:
"""Get the postprocess information for SDK.
Return:
dict: Composed of the postprocess information.
"""
postprocess = self.model_cfg.model.cls_head
assert 'cls_head' in self.model_cfg.model
assert 'num_classes' in self.model_cfg.model.cls_head
logger = get_root_logger()
logger.warning('use default top-k value 1')
num_classes = self.model_cfg.model.cls_head.num_classes
params = dict(topk=1, num_classes=num_classes)
postprocess = dict(type='BaseHead', params=params)
return postprocess
def get_model_name(self) -> str:
def get_model_name(self, *args, **kwargs) -> str:
"""Get the model name.
Return:

View File

@ -9,6 +9,7 @@ from mmengine import Config
from mmengine.model import BaseDataPreprocessor
from mmengine.registry import Registry
from mmengine.structures import BaseDataElement, LabelData
from torch import nn
from mmdeploy.codebase.base import BaseBackendModel
from mmdeploy.utils import (Backend, get_backend, get_codebase_config,
@ -37,19 +38,11 @@ class End2EndModel(BaseBackendModel):
backend_files: Sequence[str],
device: str,
deploy_cfg: Union[str, Config] = None,
model_cfg: Union[str, Config] = None,
data_preprocessor: Optional[Union[dict, nn.Module]] = None,
**kwargs):
super(End2EndModel, self).__init__(deploy_cfg=deploy_cfg)
model_cfg, deploy_cfg = load_config(model_cfg, deploy_cfg)
from mmaction.registry import MODELS
preprocessor_cfg = model_cfg.model.get('data_preprocessor', None)
if preprocessor_cfg is not None:
self.data_preprocessor = MODELS.build(
model_cfg.model.data_preprocessor)
else:
self.data_preprocessor = BaseDataPreprocessor()
super(End2EndModel, self).__init__(
deploy_cfg=deploy_cfg, data_preprocessor=data_preprocessor)
self.deploy_cfg = deploy_cfg
self.model_cfg = model_cfg
self._init_wrapper(
backend=backend,
backend_files=backend_files,
@ -114,10 +107,14 @@ class End2EndModel(BaseBackendModel):
return data_samples
def build_video_recognition_model(model_files: Sequence[str],
model_cfg: Union[str, mmengine.Config],
deploy_cfg: Union[str, mmengine.Config],
device: str, **kwargs):
def build_video_recognition_model(
model_files: Sequence[str],
model_cfg: Union[str, mmengine.Config],
deploy_cfg: Union[str, mmengine.Config],
device: str,
data_preprocessor: Optional[Union[Config,
BaseDataPreprocessor]] = None,
**kwargs):
"""Build video recognition model for different backends.
Args:
@ -127,6 +124,8 @@ def build_video_recognition_model(model_files: Sequence[str],
deploy_cfg (str | mmengine.Config): Input deployment config file or
Config object.
device (str): Device to input model.
data_preprocessor (BaseDataPreprocessor | Config): The data
preprocessor of the model.
Returns:
BaseBackendModel: Video recognizer for a configured backend.
@ -144,7 +143,7 @@ def build_video_recognition_model(model_files: Sequence[str],
backend_files=model_files,
device=device,
deploy_cfg=deploy_cfg,
model_cfg=model_cfg,
data_preprocessor=data_preprocessor,
**kwargs))
return backend_video_recognizer

View File

@ -85,7 +85,9 @@ SDK_TASK_MAP = {
Task.POSE_DETECTION:
dict(component='Detector', cls_name='PoseDetector'),
Task.ROTATED_DETECTION:
dict(component='ResizeRBBox', cls_name='RotatedDetector')
dict(component='ResizeRBBox', cls_name='RotatedDetector'),
Task.VIDEO_RECOGNITION:
dict(component='BaseHead', cls_name='VideoRecognizer')
}
TENSORRT_MAX_TOPK = 3840

Binary file not shown.

View File

@ -771,7 +771,7 @@ def get_backend_result(pipeline_info: dict, model_cfg_path: Path,
if sdk_config is not None:
if codebase_name == 'mmcls':
if codebase_name == 'mmcls' or codebase_name == 'mmaction':
replace_top_in_pipeline_json(backend_output_path, logger)
log_path = gen_log_path(