[Feature] Sync mmaction2-sdk(master) to dev1.x (#1307)
* sync mmaction sdk * pipeline.json * fix docs * replace topk when make regression for mmaction2 * add python api * add missing file * add missing test file * remove cudnn dep for formatshape * add sample arg for inputpull/1362/head
parent
180500d76d
commit
ccc21289d1
csrc/mmdeploy
apis
c/mmdeploy
cxx/mmdeploy
python
demo
docs
en
03-benchmark
04-supported-codebases
zh_cn
03-benchmark
04-supported-codebases
mmdeploy
backend/sdk
codebase/mmaction/deploy
utils
tests/data
tools
|
@ -0,0 +1,190 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "video_recognizer.h"
|
||||
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "common_internal.h"
|
||||
#include "executor_internal.h"
|
||||
#include "mmdeploy/archive/value_archive.h"
|
||||
#include "mmdeploy/codebase/mmaction/mmaction.h"
|
||||
#include "mmdeploy/core/device.h"
|
||||
#include "mmdeploy/core/mat.h"
|
||||
#include "mmdeploy/core/model.h"
|
||||
#include "mmdeploy/core/status_code.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
#include "mmdeploy/core/value.h"
|
||||
#include "model.h"
|
||||
#include "pipeline.h"
|
||||
|
||||
using namespace mmdeploy;
|
||||
|
||||
namespace {
|
||||
Value config_template(const Model& model) {
|
||||
// clang-format off
|
||||
return {
|
||||
{"type", "Pipeline"},
|
||||
{"input", {"video"}},
|
||||
{
|
||||
"tasks", {
|
||||
{
|
||||
{"name", "Video Recognizer"},
|
||||
{"type", "Inference"},
|
||||
{"input", "video"},
|
||||
{"output", "label"},
|
||||
{"params", {{"model", std::move(model)}}},
|
||||
}
|
||||
}
|
||||
},
|
||||
{"output", "label"},
|
||||
};
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int mmdeploy_video_recognizer_create(mmdeploy_model_t model, const char* device_name, int device_id,
|
||||
mmdeploy_video_recognizer_t* recognizer) {
|
||||
mmdeploy_context_t context{};
|
||||
auto ec = mmdeploy_context_create_by_device(device_name, device_id, &context);
|
||||
if (ec != MMDEPLOY_SUCCESS) {
|
||||
return ec;
|
||||
}
|
||||
ec = mmdeploy_video_recognizer_create_v2(model, context, recognizer);
|
||||
mmdeploy_context_destroy(context);
|
||||
return ec;
|
||||
}
|
||||
|
||||
int mmdeploy_video_recognizer_create_by_path(const char* model_path, const char* device_name,
|
||||
int device_id,
|
||||
mmdeploy_video_recognizer_t* recognizer) {
|
||||
mmdeploy_model_t model{};
|
||||
|
||||
if (auto ec = mmdeploy_model_create_by_path(model_path, &model)) {
|
||||
return ec;
|
||||
}
|
||||
auto ec = mmdeploy_video_recognizer_create(model, device_name, device_id, recognizer);
|
||||
mmdeploy_model_destroy(model);
|
||||
return ec;
|
||||
}
|
||||
int mmdeploy_video_recognizer_apply(mmdeploy_video_recognizer_t recognizer,
|
||||
const mmdeploy_mat_t* images,
|
||||
const mmdeploy_video_sample_info_t* video_info, int video_count,
|
||||
mmdeploy_video_recognition_t** results, int** result_count) {
|
||||
wrapped<mmdeploy_value_t> input;
|
||||
if (auto ec =
|
||||
mmdeploy_video_recognizer_create_input(images, video_info, video_count, input.ptr())) {
|
||||
return ec;
|
||||
}
|
||||
|
||||
wrapped<mmdeploy_value_t> output;
|
||||
if (auto ec = mmdeploy_video_recognizer_apply_v2(recognizer, input, output.ptr())) {
|
||||
return ec;
|
||||
}
|
||||
|
||||
if (auto ec = mmdeploy_video_recognizer_get_result(output, results, result_count)) {
|
||||
return ec;
|
||||
}
|
||||
return MMDEPLOY_SUCCESS;
|
||||
}
|
||||
|
||||
void mmdeploy_video_recognizer_release_result(mmdeploy_video_recognition_t* results,
|
||||
int* result_count, int video_count) {
|
||||
delete[] results;
|
||||
delete[] result_count;
|
||||
}
|
||||
|
||||
void mmdeploy_video_recognizer_destroy(mmdeploy_video_recognizer_t recognizer) {
|
||||
mmdeploy_pipeline_destroy((mmdeploy_pipeline_t)recognizer);
|
||||
}
|
||||
|
||||
int mmdeploy_video_recognizer_create_v2(mmdeploy_model_t model, mmdeploy_context_t context,
|
||||
mmdeploy_video_recognizer_t* recognizer) {
|
||||
auto config = config_template(*Cast(model));
|
||||
return mmdeploy_pipeline_create_v3(Cast(&config), context, (mmdeploy_pipeline_t*)recognizer);
|
||||
}
|
||||
|
||||
int mmdeploy_video_recognizer_create_input(const mmdeploy_mat_t* images,
|
||||
const mmdeploy_video_sample_info_t* video_info,
|
||||
int video_count, mmdeploy_value_t* value) {
|
||||
if (video_count && (images == nullptr || video_info == nullptr)) {
|
||||
return MMDEPLOY_E_INVALID_ARG;
|
||||
}
|
||||
try {
|
||||
auto input = std::make_unique<Value>(Value{Value::kArray});
|
||||
auto sample = std::make_unique<Value>(Value::kArray);
|
||||
for (int i = 0; i < video_count; ++i) {
|
||||
int clip_len = video_info[i].clip_len;
|
||||
int num_clips = video_info[i].num_clips;
|
||||
int n_mat = clip_len * num_clips;
|
||||
for (int j = 0; j < n_mat; j++) {
|
||||
mmdeploy::Mat _mat{images[j].height,
|
||||
images[j].width,
|
||||
PixelFormat(images[j].format),
|
||||
DataType(images[j].type),
|
||||
images[j].data,
|
||||
images[j].device ? *(const Device*)(images[j].device) : Device{0}};
|
||||
sample->push_back({{"ori_img", _mat}, {"clip_len", clip_len}, {"num_clips", num_clips}});
|
||||
}
|
||||
input->front().push_back(std::move(*sample.release()));
|
||||
}
|
||||
*value = Cast(input.release());
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
|
||||
} catch (...) {
|
||||
MMDEPLOY_ERROR("unknown exception caught");
|
||||
}
|
||||
return MMDEPLOY_SUCCESS;
|
||||
}
|
||||
|
||||
int mmdeploy_video_recognizer_apply_v2(mmdeploy_video_recognizer_t recognizer,
|
||||
mmdeploy_value_t input, mmdeploy_value_t* output) {
|
||||
return mmdeploy_pipeline_apply((mmdeploy_pipeline_t)recognizer, input, output);
|
||||
}
|
||||
|
||||
int mmdeploy_video_recognizer_get_result(mmdeploy_value_t output,
|
||||
mmdeploy_video_recognition_t** results,
|
||||
int** result_count) {
|
||||
if (!output || !results || !result_count) {
|
||||
return MMDEPLOY_E_INVALID_ARG;
|
||||
}
|
||||
try {
|
||||
Value& value = Cast(output)->front();
|
||||
|
||||
auto classify_outputs = from_value<std::vector<mmaction::Labels>>(value);
|
||||
|
||||
std::vector<int> _result_count;
|
||||
_result_count.reserve(classify_outputs.size());
|
||||
|
||||
for (const auto& cls_output : classify_outputs) {
|
||||
_result_count.push_back((int)cls_output.size());
|
||||
}
|
||||
|
||||
auto total = std::accumulate(begin(_result_count), end(_result_count), 0);
|
||||
|
||||
std::unique_ptr<int[]> result_count_data(new int[_result_count.size()]{});
|
||||
std::copy(_result_count.begin(), _result_count.end(), result_count_data.get());
|
||||
|
||||
std::unique_ptr<mmdeploy_video_recognition_t[]> result_data(
|
||||
new mmdeploy_video_recognition_t[total]{});
|
||||
auto result_ptr = result_data.get();
|
||||
for (const auto& cls_output : classify_outputs) {
|
||||
for (const auto& label : cls_output) {
|
||||
result_ptr->label_id = label.label_id;
|
||||
result_ptr->score = label.score;
|
||||
++result_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
*result_count = result_count_data.release();
|
||||
*results = result_data.release();
|
||||
|
||||
return MMDEPLOY_SUCCESS;
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
|
||||
} catch (...) {
|
||||
MMDEPLOY_ERROR("unknown exception caught");
|
||||
}
|
||||
return MMDEPLOY_E_FAIL;
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
/**
|
||||
* @file video_recognizer.h
|
||||
* @brief Interface to MMACTION video recognition task
|
||||
*/
|
||||
|
||||
#ifndef MMDEPLOY_VIDEO_RECOGNIZER_H
|
||||
#define MMDEPLOY_VIDEO_RECOGNIZER_H
|
||||
|
||||
#include "common.h"
|
||||
#include "executor.h"
|
||||
#include "model.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct mmdeploy_video_recognition_t {
|
||||
int label_id;
|
||||
float score;
|
||||
} mmdeploy_video_recognition_t;
|
||||
|
||||
typedef struct mmdeploy_video_sample_info_t {
|
||||
int clip_len;
|
||||
int num_clips;
|
||||
} mmdeploy_video_sample_info_t;
|
||||
|
||||
typedef struct mmdeploy_video_recognizer* mmdeploy_video_recognizer_t;
|
||||
|
||||
/**
|
||||
* @brief Create video recognizer's handle
|
||||
* @param[in] model an instance of mmaction sdk model created by
|
||||
* \ref mmdeploy_model_create_by_path or \ref mmdeploy_model_create in \ref model.h
|
||||
* @param[in] device_name name of device, such as "cpu", "cuda", etc.
|
||||
* @param[in] device_id id of device.
|
||||
* @param[out] recognizer handle of the created video recognizer, which must be destroyed
|
||||
* by \ref mmdeploy_video_recognizer_destroy
|
||||
* @return status of creating video recognizer's handle
|
||||
*/
|
||||
MMDEPLOY_API int mmdeploy_video_recognizer_create(mmdeploy_model_t model, const char* device_name,
|
||||
int device_id,
|
||||
mmdeploy_video_recognizer_t* recognizer);
|
||||
|
||||
/**
|
||||
* @brief Create a video recognizer instance
|
||||
* @param[in] model_path path to video recognition model
|
||||
* @param[in] device_name name of device, such as "cpu", "cuda", etc.
|
||||
* @param[in] device_id id of device.
|
||||
* @param[out] recognizer handle of the created video recognizer, which must be destroyed
|
||||
* by \ref mmdeploy_video_recognizer_destroy
|
||||
* @return status code of the operation
|
||||
*/
|
||||
MMDEPLOY_API int mmdeploy_video_recognizer_create_by_path(const char* model_path,
|
||||
const char* device_name, int device_id,
|
||||
mmdeploy_video_recognizer_t* recognizer);
|
||||
|
||||
/**
|
||||
* @brief Apply video recognizer to a batch of videos
|
||||
* @param[in] recognizer video recognizer's handle created by \ref
|
||||
* mmdeploy_video_recognizer_create_by_path
|
||||
* @param[in] images a batch of videos
|
||||
* @param[in] video_info video information of each video
|
||||
* @param[in] video_count number of videos
|
||||
* @param[out] results a linear buffer contains the recognized video, must be release
|
||||
* by \ref mmdeploy_video_recognizer_release_result
|
||||
* @param[out] result_count a linear buffer with length being \p video_count to save the number of
|
||||
* recognition results of each video. It must be released by \ref
|
||||
* mmdeploy_video_recognizer_release_result
|
||||
* @return status code of the operation
|
||||
*/
|
||||
MMDEPLOY_API int mmdeploy_video_recognizer_apply(mmdeploy_video_recognizer_t recognizer,
|
||||
const mmdeploy_mat_t* images,
|
||||
const mmdeploy_video_sample_info_t* video_info,
|
||||
int video_count,
|
||||
mmdeploy_video_recognition_t** results,
|
||||
int** result_count);
|
||||
|
||||
/** @brief Release result buffer returned by \ref mmdeploy_video_recognizer_apply
|
||||
* @param[in] results result buffer by video recognizer
|
||||
* @param[in] result_count \p results size buffer
|
||||
* @param[in] video_count length of \p result_count
|
||||
*/
|
||||
MMDEPLOY_API void mmdeploy_video_recognizer_release_result(mmdeploy_video_recognition_t* results,
|
||||
int* result_count, int video_count);
|
||||
|
||||
/**
|
||||
* @brief destroy video recognizer
|
||||
* @param[in] recognizer handle of video recognizer created by \ref
|
||||
* mmdeploy_video_recognizer_create_by_path or \ref mmdeploy_video_recognizer_create
|
||||
*/
|
||||
MMDEPLOY_API void mmdeploy_video_recognizer_destroy(mmdeploy_video_recognizer_t recognizer);
|
||||
|
||||
/**
|
||||
* @brief Same as \ref mmdeploy_video_recognizer_create, but allows to control execution context of
|
||||
* tasks via context
|
||||
*/
|
||||
MMDEPLOY_API int mmdeploy_video_recognizer_create_v2(mmdeploy_model_t model,
|
||||
mmdeploy_context_t context,
|
||||
mmdeploy_video_recognizer_t* recognizer);
|
||||
|
||||
/**
|
||||
* @brief Pack video recognizer inputs into mmdeploy_value_t
|
||||
* @param[in] images a batch of videos
|
||||
* @param[in] video_info video information of each video
|
||||
* @param[in] video_count number of videos in the batch
|
||||
* @param[out] value created value
|
||||
* @return status code of the operation
|
||||
*/
|
||||
MMDEPLOY_API int mmdeploy_video_recognizer_create_input(
|
||||
const mmdeploy_mat_t* images, const mmdeploy_video_sample_info_t* video_info, int video_count,
|
||||
mmdeploy_value_t* value);
|
||||
|
||||
/**
|
||||
* @brief Apply video recognizer to a batch of videos
|
||||
* @param[in] input packed input
|
||||
* @param[out] output inference output
|
||||
* @return status code of the operation
|
||||
*/
|
||||
MMDEPLOY_API int mmdeploy_video_recognizer_apply_v2(mmdeploy_video_recognizer_t recognizer,
|
||||
mmdeploy_value_t input,
|
||||
mmdeploy_value_t* output);
|
||||
|
||||
/**
|
||||
* @brief Apply video recognizer to a batch of videos
|
||||
* @param[in] output inference output
|
||||
* @param[out] results structured output
|
||||
* @param[out] result_count number of each videos
|
||||
* @return status code of the operation
|
||||
*/
|
||||
MMDEPLOY_API int mmdeploy_video_recognizer_get_result(mmdeploy_value_t output,
|
||||
mmdeploy_video_recognition_t** results,
|
||||
int** result_count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MMDEPLOY_VIDEO_RECOGNIZER_H
|
|
@ -0,0 +1,91 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_CSRC_MMDEPLOY_APIS_CXX_VIDEO_RECOGNIZER_HPP_
|
||||
#define MMDEPLOY_CSRC_MMDEPLOY_APIS_CXX_VIDEO_RECOGNIZER_HPP_
|
||||
|
||||
#include "mmdeploy/common.hpp"
|
||||
#include "mmdeploy/video_recognizer.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
namespace cxx {
|
||||
|
||||
using VideoRecognition = mmdeploy_video_recognition_t;
|
||||
using VideoSampleInfo = mmdeploy_video_sample_info_t;
|
||||
|
||||
class VideoRecognizer : public NonMovable {
|
||||
public:
|
||||
VideoRecognizer(const Model& model, const Context& context) {
|
||||
auto ec = mmdeploy_video_recognizer_create_v2(model, context, &recognizer_);
|
||||
if (ec != MMDEPLOY_SUCCESS) {
|
||||
throw_exception(static_cast<ErrorCode>(ec));
|
||||
}
|
||||
}
|
||||
|
||||
~VideoRecognizer() {
|
||||
if (recognizer_) {
|
||||
mmdeploy_video_recognizer_destroy(recognizer_);
|
||||
recognizer_ = {};
|
||||
}
|
||||
}
|
||||
|
||||
using Result = Result_<VideoRecognition>;
|
||||
|
||||
std::vector<Result> Apply(Span<const std::vector<Mat>> videos,
|
||||
Span<const VideoSampleInfo> infos) {
|
||||
if (videos.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
int video_count = videos.size();
|
||||
|
||||
VideoRecognition* results{};
|
||||
int* result_count{};
|
||||
std::vector<Mat> images;
|
||||
std::vector<VideoSampleInfo> video_info;
|
||||
for (int i = 0; i < videos.size(); i++) {
|
||||
for (auto& mat : videos[i]) {
|
||||
images.push_back(mat);
|
||||
}
|
||||
video_info.push_back(infos[i]);
|
||||
}
|
||||
|
||||
auto ec =
|
||||
mmdeploy_video_recognizer_apply(recognizer_, reinterpret(images.data()), video_info.data(),
|
||||
video_count, &results, &result_count);
|
||||
if (ec != MMDEPLOY_SUCCESS) {
|
||||
throw_exception(static_cast<ErrorCode>(ec));
|
||||
}
|
||||
|
||||
std::vector<Result> rets;
|
||||
rets.reserve(video_count);
|
||||
|
||||
std::shared_ptr<VideoRecognition> data(results, [result_count, count = video_count](auto p) {
|
||||
mmdeploy_video_recognizer_release_result(p, result_count, count);
|
||||
});
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < video_count; ++i) {
|
||||
offset += rets.emplace_back(offset, result_count[i], data).size();
|
||||
}
|
||||
|
||||
return rets;
|
||||
}
|
||||
|
||||
Result Apply(const std::vector<Mat>& video, const VideoSampleInfo info) {
|
||||
return Apply(Span{video}, Span{info})[0];
|
||||
}
|
||||
|
||||
private:
|
||||
mmdeploy_video_recognizer_t recognizer_{};
|
||||
};
|
||||
|
||||
} // namespace cxx
|
||||
|
||||
using cxx::VideoRecognition;
|
||||
using cxx::VideoRecognizer;
|
||||
using cxx::VideoSampleInfo;
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_CSRC_MMDEPLOY_APIS_CXX_VIDEO_RECOGNIZER_HPP_
|
|
@ -0,0 +1,88 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/video_recognizer.h"
|
||||
|
||||
#include "common.h"
|
||||
|
||||
namespace mmdeploy::python {
|
||||
|
||||
class PyVideoRecognizer {
|
||||
public:
|
||||
PyVideoRecognizer(const char* model_path, const char* device_name, int device_id) {
|
||||
auto status =
|
||||
mmdeploy_video_recognizer_create_by_path(model_path, device_name, device_id, &recognizer_);
|
||||
if (status != MMDEPLOY_SUCCESS) {
|
||||
throw std::runtime_error("failed to create video_recognizer");
|
||||
}
|
||||
}
|
||||
std::vector<std::vector<std::tuple<int, float>>> Apply(
|
||||
const std::vector<std::vector<PyImage>>& imgs, const std::vector<std::pair<int, int>>& info) {
|
||||
if (info.size() != imgs.size()) {
|
||||
throw std::invalid_argument("the length of info is not equal with imgs");
|
||||
}
|
||||
for (int i = 0; i < info.size(); i++) {
|
||||
if (imgs[i].size() != info[i].first * info[i].second) {
|
||||
throw std::invalid_argument("invalid info");
|
||||
}
|
||||
}
|
||||
int total = 0;
|
||||
for (int i = 0; i < imgs.size(); i++) {
|
||||
total += imgs[i].size();
|
||||
}
|
||||
std::vector<mmdeploy_mat_t> clips;
|
||||
std::vector<mmdeploy_video_sample_info_t> clip_info;
|
||||
clips.reserve(total);
|
||||
clip_info.reserve(total);
|
||||
for (int i = 0; i < imgs.size(); i++) {
|
||||
for (const auto& img : imgs[i]) {
|
||||
auto mat = GetMat(img);
|
||||
clips.push_back(mat);
|
||||
}
|
||||
clip_info.push_back({info[i].first, info[i].second});
|
||||
}
|
||||
|
||||
mmdeploy_video_recognition_t* results{};
|
||||
int* result_count{};
|
||||
auto status = mmdeploy_video_recognizer_apply(recognizer_, clips.data(), clip_info.data(), 1,
|
||||
&results, &result_count);
|
||||
if (status != MMDEPLOY_SUCCESS) {
|
||||
throw std::runtime_error("failed to apply video_recognizer, code: " + std::to_string(status));
|
||||
}
|
||||
|
||||
auto output = std::vector<std::vector<std::tuple<int, float>>>{};
|
||||
output.reserve(imgs.size());
|
||||
auto result_ptr = results;
|
||||
for (int i = 0; i < imgs.size(); ++i) {
|
||||
std::vector<std::tuple<int, float>> label_score;
|
||||
for (int j = 0; j < result_count[i]; ++j) {
|
||||
label_score.emplace_back(result_ptr[j].label_id, result_ptr[j].score);
|
||||
}
|
||||
output.push_back(std::move(label_score));
|
||||
result_ptr += result_count[i];
|
||||
}
|
||||
mmdeploy_video_recognizer_release_result(results, result_count, (int)imgs.size());
|
||||
return output;
|
||||
}
|
||||
|
||||
~PyVideoRecognizer() {
|
||||
mmdeploy_video_recognizer_destroy(recognizer_);
|
||||
recognizer_ = {};
|
||||
}
|
||||
|
||||
private:
|
||||
mmdeploy_video_recognizer_t recognizer_{};
|
||||
};
|
||||
|
||||
static PythonBindingRegisterer register_video_recognizer{[](py::module& m) {
|
||||
py::class_<PyVideoRecognizer>(m, "VideoRecognizer")
|
||||
.def(py::init([](const char* model_path, const char* device_name, int device_id) {
|
||||
return std::make_unique<PyVideoRecognizer>(model_path, device_name, device_id);
|
||||
}),
|
||||
py::arg("model_path"), py::arg("device_name"), py::arg("device_id") = 0)
|
||||
.def("__call__",
|
||||
[](PyVideoRecognizer* self, const std::vector<PyImage>& imgs,
|
||||
const std::pair<int, int>& info) { return self->Apply({imgs}, {info})[0]; })
|
||||
.def("batch", &PyVideoRecognizer::Apply);
|
||||
}};
|
||||
|
||||
} // namespace mmdeploy::python
|
|
@ -11,6 +11,7 @@ if ("all" IN_LIST MMDEPLOY_CODEBASES)
|
|||
list(APPEND CODEBASES "mmedit")
|
||||
list(APPEND CODEBASES "mmpose")
|
||||
list(APPEND CODEBASES "mmrotate")
|
||||
list(APPEND CODEBASES "mmaction")
|
||||
else ()
|
||||
set(CODEBASES ${MMDEPLOY_CODEBASES})
|
||||
endif ()
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_mmaction)
|
||||
|
||||
file(GLOB SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
|
||||
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
|
||||
add_subdirectory(cpu)
|
||||
add_subdirectory(cuda)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
mmdeploy::transform
|
||||
mmdeploy_opencv_utils)
|
||||
|
||||
add_library(mmdeploy::mmaction ALIAS ${PROJECT_NAME})
|
||||
|
||||
set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} video_recognizer CACHE INTERNAL "")
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "mmdeploy/codebase/mmaction/mmaction.h"
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
|
||||
namespace mmdeploy::mmaction {
|
||||
|
||||
class BaseHead : public MMAction {
|
||||
public:
|
||||
explicit BaseHead(const Value& cfg) : MMAction(cfg) {
|
||||
if (cfg.contains("params")) {
|
||||
topk_ = cfg["params"].value("topk", 1);
|
||||
if (topk_ <= 0) {
|
||||
MMDEPLOY_ERROR("'topk' should be greater than 0, but got '{}'", topk_);
|
||||
throw_exception(eInvalidArgument);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Result<Value> operator()(const Value& infer_res) {
|
||||
MMDEPLOY_DEBUG("infer_res: {}", infer_res);
|
||||
auto output = infer_res["output"].get<Tensor>();
|
||||
|
||||
if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
|
||||
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
|
||||
(int)output.data_type());
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
auto class_num = (int)output.shape(1);
|
||||
|
||||
OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(output, kHost, stream()));
|
||||
OUTCOME_TRY(stream().Wait());
|
||||
|
||||
return GetLabels(_scores, class_num);
|
||||
}
|
||||
|
||||
private:
|
||||
Value GetLabels(const Tensor& scores, int class_num) const {
|
||||
auto scores_data = scores.data<float>();
|
||||
Labels output;
|
||||
output.reserve(topk_);
|
||||
std::vector<int> idx(class_num);
|
||||
iota(begin(idx), end(idx), 0);
|
||||
partial_sort(begin(idx), begin(idx) + topk_, end(idx),
|
||||
[&](int i, int j) { return scores_data[i] > scores_data[j]; });
|
||||
for (int i = 0; i < topk_; ++i) {
|
||||
auto label = Label{idx[i], scores_data[idx[i]]};
|
||||
MMDEPLOY_DEBUG("label_id: {}, score: {}", label.label_id, label.score);
|
||||
output.push_back(label);
|
||||
}
|
||||
return to_value(std::move(output));
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr const auto kHost = Device{0};
|
||||
int topk_{1};
|
||||
};
|
||||
|
||||
REGISTER_CODEBASE_COMPONENT(MMAction, BaseHead);
|
||||
using SlowFastHead = BaseHead;
|
||||
REGISTER_CODEBASE_COMPONENT(MMAction, SlowFastHead);
|
||||
using TSNHead = BaseHead;
|
||||
REGISTER_CODEBASE_COMPONENT(MMAction, TSNHead);
|
||||
|
||||
} // namespace mmdeploy::mmaction
|
|
@ -0,0 +1,15 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_mmaction_cpu_impl CXX)
|
||||
|
||||
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
add_library(${PROJECT_NAME} OBJECT format_shape_impl.cpp)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
if (NOT (MMDEPLOY_SHARED_LIBS OR MSVC))
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
|
||||
endif ()
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
mmdeploy::core)
|
||||
target_link_libraries(mmdeploy_mmaction PRIVATE ${PROJECT_NAME})
|
||||
mmdeploy_export(${PROJECT_NAME})
|
||||
endif ()
|
|
@ -0,0 +1,138 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/codebase/mmaction/format_shape.h"
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cpu {
|
||||
|
||||
class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
|
||||
public:
|
||||
explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
|
||||
int N = tensors.size();
|
||||
int H = tensors[0].shape(1);
|
||||
int W = tensors[0].shape(2);
|
||||
int C = tensors[0].shape(3);
|
||||
|
||||
std::vector<Tensor> host_tensors;
|
||||
host_tensors.reserve(N);
|
||||
for (int i = 0; i < N; i++) {
|
||||
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensors[i], kHost, stream_));
|
||||
host_tensors.push_back(std::move(src_tensor));
|
||||
}
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
|
||||
TensorDesc desc = {kHost, DataType::kFLOAT, {N, H, W, C}};
|
||||
Tensor imgs(desc);
|
||||
int offset = 0;
|
||||
int n_item = H * W * C;
|
||||
int copy_size = n_item * sizeof(float);
|
||||
for (int i = 0; i < N; i++) {
|
||||
auto src_buffer = host_tensors[i].buffer();
|
||||
auto dst_buffer = imgs.buffer();
|
||||
OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
|
||||
offset += copy_size;
|
||||
}
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
|
||||
Tensor dst;
|
||||
if (arg_.input_format == "NCHW") {
|
||||
OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
|
||||
}
|
||||
if (arg_.input_format == "NCTHW") {
|
||||
OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
|
||||
}
|
||||
TensorShape expand_dim = dst.shape();
|
||||
expand_dim.insert(expand_dim.begin(), 1);
|
||||
dst.Reshape(expand_dim);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
|
||||
int N = src.shape(0);
|
||||
int H = src.shape(1);
|
||||
int W = src.shape(2);
|
||||
int C = src.shape(3);
|
||||
return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
|
||||
};
|
||||
|
||||
Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
|
||||
int N = src.shape(0);
|
||||
int H = src.shape(1);
|
||||
int W = src.shape(2);
|
||||
int C = src.shape(3);
|
||||
int L = clip_len;
|
||||
if (N % L != 0) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
int M = N / L;
|
||||
src.Reshape({M, L, H, W, C});
|
||||
|
||||
return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
|
||||
};
|
||||
|
||||
Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
|
||||
const std::vector<int>& permutation) {
|
||||
Tensor dst(src.desc());
|
||||
TensorShape shape(src.shape().size());
|
||||
for (int i = 0; i < shape.size(); i++) {
|
||||
shape[i] = src.shape(permutation[i]);
|
||||
}
|
||||
dst.Reshape(shape);
|
||||
int ndim = shape.size();
|
||||
std::vector<int> dst_strides(ndim);
|
||||
std::vector<int> src_strides(ndim);
|
||||
dst_strides[ndim - 1] = src_strides[ndim - 1] = 1;
|
||||
for (int i = ndim - 2; i >= 0; i--) {
|
||||
dst_strides[i] = dst_strides[i + 1] * shape[i + 1];
|
||||
src_strides[i] = src_strides[i + 1] * src_dims[i + 1];
|
||||
}
|
||||
std::vector<int> tmp(ndim);
|
||||
for (int i = 0; i < ndim; i++) {
|
||||
tmp[i] = src_strides[permutation[i]];
|
||||
}
|
||||
src_strides.swap(tmp);
|
||||
std::vector<int> coord(ndim, 0);
|
||||
auto dst_data = dst.data<float>();
|
||||
auto src_data = src.data<float>();
|
||||
|
||||
int i;
|
||||
do {
|
||||
dst_data[0] = src_data[0];
|
||||
for (i = ndim - 1; i >= 0; i--) {
|
||||
if (++coord[i] == shape[i]) {
|
||||
coord[i] = 0;
|
||||
dst_data -= (shape[i] - 1) * dst_strides[i];
|
||||
src_data -= (shape[i] - 1) * src_strides[i];
|
||||
} else {
|
||||
dst_data += dst_strides[i];
|
||||
src_data += src_strides[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (i >= 0);
|
||||
return dst;
|
||||
}
|
||||
|
||||
constexpr static Device kHost{0, 0};
|
||||
};
|
||||
|
||||
class FormatShapeImplCreator : public Creator<::mmdeploy::FormatShapeImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "cpu"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<FormatShapeImpl>(args); }
|
||||
};
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace mmdeploy
|
||||
|
||||
using ::mmdeploy::FormatShapeImpl;
|
||||
using ::mmdeploy::cpu::FormatShapeImplCreator;
|
||||
REGISTER_MODULE(FormatShapeImpl, FormatShapeImplCreator);
|
|
@ -0,0 +1,18 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
if (NOT "cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
project(mmdeploy_mmaction_cuda_impl CXX)
|
||||
|
||||
add_library(${PROJECT_NAME} OBJECT format_shape_impl.cpp transpose.cu)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
if (NOT (MMDEPLOY_SHARED_LIBS OR MSVC))
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
|
||||
endif ()
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
${CUDA_INCLUDE_DIRS})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
mmdeploy::core)
|
||||
target_link_libraries(mmdeploy_mmaction PRIVATE ${PROJECT_NAME})
|
||||
mmdeploy_export(${PROJECT_NAME})
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include "mmdeploy/codebase/mmaction/format_shape.h"
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cuda {
|
||||
|
||||
template <typename T>
|
||||
void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides, int ndim,
|
||||
int total, cudaStream_t stream);
|
||||
|
||||
class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
|
||||
public:
|
||||
explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
|
||||
int N = tensors.size();
|
||||
int H = tensors[0].shape(1);
|
||||
int W = tensors[0].shape(2);
|
||||
int C = tensors[0].shape(3);
|
||||
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
TensorDesc desc = {device_, DataType::kFLOAT, {N, H, W, C}};
|
||||
Tensor imgs(desc);
|
||||
int offset = 0;
|
||||
int n_item = H * W * C;
|
||||
int copy_size = n_item * sizeof(float);
|
||||
for (int i = 0; i < N; i++) {
|
||||
auto src_buffer = tensors[i].buffer();
|
||||
auto dst_buffer = imgs.buffer();
|
||||
OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
|
||||
offset += copy_size;
|
||||
}
|
||||
|
||||
Tensor dst;
|
||||
if (arg_.input_format == "NCHW") {
|
||||
OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
|
||||
}
|
||||
if (arg_.input_format == "NCTHW") {
|
||||
OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
|
||||
}
|
||||
TensorShape expand_dim = dst.shape();
|
||||
expand_dim.insert(expand_dim.begin(), 1);
|
||||
dst.Reshape(expand_dim);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
|
||||
int N = src.shape(0);
|
||||
int H = src.shape(1);
|
||||
int W = src.shape(2);
|
||||
int C = src.shape(3);
|
||||
return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
|
||||
};
|
||||
|
||||
Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
|
||||
int N = src.shape(0);
|
||||
int H = src.shape(1);
|
||||
int W = src.shape(2);
|
||||
int C = src.shape(3);
|
||||
int L = clip_len;
|
||||
if (N % L != 0) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
int M = N / L;
|
||||
src.Reshape({M, L, H, W, C});
|
||||
|
||||
return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
|
||||
};
|
||||
|
||||
Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
|
||||
const std::vector<int>& permutation) {
|
||||
Tensor dst(src.desc());
|
||||
TensorShape shape(src.shape().size());
|
||||
for (int i = 0; i < shape.size(); i++) {
|
||||
shape[i] = src.shape(permutation[i]);
|
||||
}
|
||||
dst.Reshape(shape);
|
||||
|
||||
int ndim = src_dims.size();
|
||||
std::vector<int> dst_dims(ndim);
|
||||
for (int i = 0; i < ndim; i++) {
|
||||
dst_dims[i] = src_dims[permutation[i]];
|
||||
}
|
||||
|
||||
std::vector<int> src_strides(ndim);
|
||||
std::vector<int> dst_strides(ndim);
|
||||
std::vector<int> buffer(ndim);
|
||||
buffer.back() = 1;
|
||||
dst_strides.back() = 1;
|
||||
for (int i = ndim - 1; i > 0; i--) {
|
||||
buffer[i - 1] = buffer[i] * src_dims[i];
|
||||
dst_strides[i - 1] = dst_strides[i] * dst_dims[i];
|
||||
}
|
||||
for (int i = 0; i < ndim; ++i) {
|
||||
src_strides[i] = buffer[permutation[i]];
|
||||
}
|
||||
|
||||
Buffer _src_strides(Device("cuda"), sizeof(int) * ndim);
|
||||
Buffer _dst_strides(Device("cuda"), sizeof(int) * ndim);
|
||||
OUTCOME_TRY(stream_.Copy(src_strides.data(), _src_strides));
|
||||
OUTCOME_TRY(stream_.Copy(dst_strides.data(), _dst_strides));
|
||||
|
||||
::mmdeploy::cuda::Transpose(src.data<float>(), GetNative<int*>(_src_strides), dst.data<float>(),
|
||||
GetNative<int*>(_dst_strides), ndim, src.size(),
|
||||
(cudaStream_t)stream_.GetNative());
|
||||
return dst;
|
||||
}
|
||||
};
|
||||
|
||||
class FormatShapeImplCreator : public Creator<::mmdeploy::FormatShapeImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "cuda"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<FormatShapeImpl>(args); }
|
||||
};
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace mmdeploy
|
||||
|
||||
using ::mmdeploy::FormatShapeImpl;
|
||||
using ::mmdeploy::cuda::FormatShapeImplCreator;
|
||||
REGISTER_MODULE(FormatShapeImpl, FormatShapeImplCreator);
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cuda {
|
||||
|
||||
template <typename T>
|
||||
__global__ void transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides,
|
||||
int ndim, int total) {
|
||||
int u = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (u >= total) return;
|
||||
|
||||
int remaining = u;
|
||||
int v = 0;
|
||||
for (int i = 0; i < ndim; i++) {
|
||||
int p = remaining / dst_strides[i];
|
||||
remaining -= p * dst_strides[i];
|
||||
v += p * src_strides[i];
|
||||
}
|
||||
dst[u] = src[v];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides, int ndim,
|
||||
int total, cudaStream_t stream) {
|
||||
int thread_num = 256;
|
||||
int block_num = (total + thread_num - 1) / thread_num;
|
||||
transpose<T>
|
||||
<<<block_num, thread_num, 0, stream>>>(src, src_strides, dst, dst_strides, ndim, total);
|
||||
}
|
||||
|
||||
template void Transpose<float>(const float* src, const int* src_strides, float* dst,
|
||||
const int* dst_strides, int ndim, int total, cudaStream_t stream);
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,89 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/codebase/mmaction/format_shape.h"
|
||||
|
||||
#include "mmdeploy/archive/json_archive.h"
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
FormatShapeImpl::FormatShapeImpl(const Value& args) : TransformImpl(args) {
|
||||
arg_.input_format = args.value("input_format", std::string(""));
|
||||
if (arg_.input_format != "NCHW" && arg_.input_format != "NCTHW") {
|
||||
throw std::domain_error("'input_format' should be 'NCHW' or 'NCTHW'");
|
||||
}
|
||||
}
|
||||
|
||||
Result<Value> FormatShapeImpl::Process(const Value& input) {
|
||||
MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
|
||||
|
||||
if (!input.is_array()) {
|
||||
MMDEPLOY_ERROR("input of format shape should be array");
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
if (!(input[0].contains("img") || input[0].contains("img"))) {
|
||||
MMDEPLOY_ERROR("input should contains imgs or img");
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
|
||||
int n_image = input.size();
|
||||
int clip_len = input[0]["clip_len"].get<int>();
|
||||
int num_clips = input[0]["num_clips"].get<int>();
|
||||
std::vector<Tensor> images;
|
||||
|
||||
if (input[0].contains("imgs")) {
|
||||
int n_crop = input[0]["imgs"].size();
|
||||
int total = n_image * n_crop;
|
||||
images.reserve(total);
|
||||
for (int i = 0; i < n_crop; i++) {
|
||||
for (int j = 0; j < n_image; j++) {
|
||||
images.push_back(input[j]["imgs"][i].get<Tensor>());
|
||||
}
|
||||
}
|
||||
} else if (input[0].contains("img")) {
|
||||
images.reserve(n_image);
|
||||
for (int i = 0; i < n_image; i++) {
|
||||
images.push_back(input[i]["img"].get<Tensor>());
|
||||
}
|
||||
}
|
||||
|
||||
Value output;
|
||||
OUTCOME_TRY(auto img, Format(images, clip_len, num_clips));
|
||||
SetTransformData(output, "img", std::move(img));
|
||||
return output;
|
||||
}
|
||||
|
||||
class FormatShape : public Transform {
|
||||
public:
|
||||
explicit FormatShape(const Value& args, int version = 0) : Transform(args) {
|
||||
auto impl_creator = Registry<FormatShapeImpl>::Get().GetCreator(specified_platform_, version);
|
||||
if (nullptr == impl_creator) {
|
||||
MMDEPLOY_ERROR("'FormatShape' is not supported on '{}' platform", specified_platform_);
|
||||
throw std::domain_error("'FormatShape' is not supported on specified platform");
|
||||
}
|
||||
impl_ = impl_creator->Create(args);
|
||||
}
|
||||
~FormatShape() override = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override { return impl_->Process(input); }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<FormatShapeImpl> impl_;
|
||||
};
|
||||
|
||||
class FormatShapeCreator : public Creator<Transform> {
|
||||
public:
|
||||
const char* GetName(void) const override { return "FormatShape"; }
|
||||
int GetVersion(void) const override { return version_; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<FormatShape>(args, version_); }
|
||||
|
||||
private:
|
||||
int version_{1};
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Transform, FormatShapeCreator);
|
||||
MMDEPLOY_DEFINE_REGISTRY(FormatShapeImpl);
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_SRC_CODEBASE_MMACTION_FORMAT_SHAPE_H_
|
||||
#define MMDEPLOY_SRC_CODEBASE_MMACTION_FORMAT_SHAPE_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
#include "mmdeploy/preprocess/transform/transform.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class FormatShapeImpl : public TransformImpl {
|
||||
public:
|
||||
explicit FormatShapeImpl(const Value& args);
|
||||
~FormatShapeImpl() override = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override;
|
||||
|
||||
protected:
|
||||
virtual Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len,
|
||||
int num_clips) = 0;
|
||||
|
||||
protected:
|
||||
struct format_shape_arg_t {
|
||||
std::string input_format;
|
||||
};
|
||||
using ArgType = struct format_shape_arg_t;
|
||||
ArgType arg_;
|
||||
};
|
||||
|
||||
MMDEPLOY_DECLARE_REGISTRY(FormatShapeImpl);
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/codebase/mmaction/mmaction.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace mmaction {
|
||||
|
||||
REGISTER_CODEBASE(MMAction);
|
||||
|
||||
}
|
||||
|
||||
MMDEPLOY_DEFINE_REGISTRY(mmaction::MMAction);
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_SRC_CODEBASE_MMACTION_MMACTION_H_
|
||||
#define MMDEPLOY_SRC_CODEBASE_MMACTION_MMACTION_H_
|
||||
|
||||
#include "mmdeploy/codebase/common.h"
|
||||
#include "mmdeploy/core/device.h"
|
||||
#include "mmdeploy/core/module.h"
|
||||
#include "mmdeploy/core/serialization.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace mmaction {
|
||||
|
||||
struct Label {
|
||||
int label_id;
|
||||
float score;
|
||||
MMDEPLOY_ARCHIVE_MEMBERS(label_id, score);
|
||||
};
|
||||
|
||||
using Labels = std::vector<Label>;
|
||||
|
||||
DECLARE_CODEBASE(MMAction, mmaction);
|
||||
} // namespace mmaction
|
||||
|
||||
MMDEPLOY_DECLARE_REGISTRY(mmaction::MMAction);
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_SRC_CODEBASE_MMACTION_MMACTION_H_
|
|
@ -5,6 +5,9 @@ project(mmdeploy_cpu_transform_impl)
|
|||
set(SRCS
|
||||
collect_impl.cpp
|
||||
crop_impl.cpp
|
||||
ten_crop_impl.cpp
|
||||
three_crop_impl.cpp
|
||||
crop_utils.cpp
|
||||
image2tensor_impl.cpp
|
||||
default_format_bundle_impl.cpp
|
||||
load_impl.cpp
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/preprocess/transform/crop.h"
|
||||
#include "mmdeploy/utils/opencv/opencv_utils.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cpu {
|
||||
|
||||
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
|
||||
int left, int bottom, int right) {
|
||||
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensor, device, stream));
|
||||
|
||||
SyncOnScopeExit(stream, src_tensor.buffer() != tensor.buffer(), src_tensor);
|
||||
|
||||
cv::Mat mat = Tensor2CVMat(src_tensor);
|
||||
cv::Mat cropped_mat = Crop(mat, top, left, bottom, right);
|
||||
return CVMat2Tensor(cropped_mat);
|
||||
}
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/preprocess/transform/ten_crop.h"
|
||||
#include "opencv_utils.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cpu {
|
||||
|
||||
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
|
||||
int left, int bottom, int right);
|
||||
|
||||
class TenCropImpl : public ::mmdeploy::TenCropImpl {
|
||||
public:
|
||||
explicit TenCropImpl(const Value& args) : ::mmdeploy::TenCropImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
|
||||
int right) override {
|
||||
return ::mmdeploy::cpu::CropImage(stream_, device_, tensor, top, left, bottom, right);
|
||||
}
|
||||
|
||||
Result<Tensor> HorizontalFlip(const Tensor& tensor) {
|
||||
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensor, device_, stream_));
|
||||
SyncOnScopeExit(stream_, src_tensor.buffer() != tensor.buffer(), src_tensor);
|
||||
cv::Mat mat = Tensor2CVMat(src_tensor);
|
||||
cv::Mat flipped_mat;
|
||||
cv::flip(mat, flipped_mat, 1);
|
||||
return CVMat2Tensor(flipped_mat);
|
||||
}
|
||||
};
|
||||
|
||||
class TenCropImplCreator : public Creator<::mmdeploy::TenCropImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "cpu"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<TenCropImpl>(args); }
|
||||
};
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace mmdeploy
|
||||
|
||||
using ::mmdeploy::TenCropImpl;
|
||||
using ::mmdeploy::cpu::TenCropImplCreator;
|
||||
REGISTER_MODULE(TenCropImpl, TenCropImplCreator);
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/preprocess/transform/three_crop.h"
|
||||
#include "opencv_utils.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cpu {
|
||||
|
||||
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
|
||||
int left, int bottom, int right);
|
||||
|
||||
class ThreeCropImpl : public ::mmdeploy::ThreeCropImpl {
|
||||
public:
|
||||
explicit ThreeCropImpl(const Value& args) : ::mmdeploy::ThreeCropImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
|
||||
int right) override {
|
||||
return ::mmdeploy::cpu::CropImage(stream_, device_, tensor, top, left, bottom, right);
|
||||
}
|
||||
};
|
||||
|
||||
class ThreeCropImplCreator : public Creator<::mmdeploy::ThreeCropImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "cpu"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<ThreeCropImpl>(args); }
|
||||
};
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace mmdeploy
|
||||
|
||||
using ::mmdeploy::ThreeCropImpl;
|
||||
using ::mmdeploy::cpu::ThreeCropImplCreator;
|
||||
REGISTER_MODULE(ThreeCropImpl, ThreeCropImplCreator);
|
|
@ -7,6 +7,9 @@ find_package(pplcv REQUIRED)
|
|||
set(SRCS
|
||||
collect_impl.cpp
|
||||
crop_impl.cpp
|
||||
three_crop_impl.cpp
|
||||
ten_crop_impl.cpp
|
||||
crop_utils.cpp
|
||||
image2tensor_impl.cpp
|
||||
default_format_bundle_impl.cpp
|
||||
load_impl.cpp
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/preprocess/transform/crop.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cuda {
|
||||
|
||||
template <typename T, int channels>
|
||||
void Crop(const T* src, int src_w, T* dst, int dst_h, int dst_w, int offset_h, int offset_w,
|
||||
cudaStream_t stream);
|
||||
|
||||
Result<Tensor> CropImage(Stream& _stream, const Device& device, const Tensor& tensor, int top,
|
||||
int left, int bottom, int right) {
|
||||
OUTCOME_TRY(auto device_tensor, MakeAvailableOnDevice(tensor, device, _stream));
|
||||
|
||||
SyncOnScopeExit sync(_stream, device_tensor.buffer() != tensor.buffer(), device_tensor);
|
||||
|
||||
auto stream = GetNative<cudaStream_t>(_stream);
|
||||
auto desc = device_tensor.desc();
|
||||
|
||||
int h = bottom - top + 1;
|
||||
int w = right - left + 1;
|
||||
int c = desc.shape[3];
|
||||
auto type = desc.data_type;
|
||||
|
||||
TensorShape shape{1, bottom - top + 1, right - left + 1, tensor.desc().shape[3]};
|
||||
TensorDesc dst_desc{device, tensor.desc().data_type, shape, desc.name};
|
||||
Tensor dst_tensor{dst_desc};
|
||||
assert(device.is_device());
|
||||
if (DataType::kINT8 == type) {
|
||||
uint8_t* input = device_tensor.data<uint8_t>();
|
||||
uint8_t* output = dst_tensor.data<uint8_t>();
|
||||
if (3 == c) {
|
||||
Crop<uint8_t, 3>(input, desc.shape[2], output, h, w, top, left, stream);
|
||||
} else if (1 == c) {
|
||||
Crop<uint8_t, 1>(input, desc.shape[2], output, h, w, top, left, stream);
|
||||
} else {
|
||||
MMDEPLOY_ERROR("unsupported channels {}", c);
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
} else if (DataType::kFLOAT == type) {
|
||||
float* input = static_cast<float*>(device_tensor.buffer().GetNative());
|
||||
float* output = static_cast<float*>(dst_tensor.buffer().GetNative());
|
||||
if (3 == c) {
|
||||
Crop<float, 3>(input, desc.shape[2], output, h, w, top, left, stream);
|
||||
} else if (1 == c) {
|
||||
Crop<float, 1>(input, desc.shape[2], output, h, w, top, left, stream);
|
||||
} else {
|
||||
MMDEPLOY_ERROR("unsupported channels {}", c);
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
} else {
|
||||
MMDEPLOY_ERROR("unsupported channels {}", c);
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
return dst_tensor;
|
||||
}
|
||||
|
||||
} // namespace cuda
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,89 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
#include "mmdeploy/preprocess/transform/ten_crop.h"
|
||||
#include "ppl/cv/cuda/flip.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cuda {
|
||||
|
||||
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
|
||||
int left, int bottom, int right);
|
||||
|
||||
class TenCropImpl : public ::mmdeploy::TenCropImpl {
|
||||
public:
|
||||
explicit TenCropImpl(const Value& args) : ::mmdeploy::TenCropImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
|
||||
int right) override {
|
||||
return ::mmdeploy::cuda::CropImage(stream_, device_, tensor, top, left, bottom, right);
|
||||
}
|
||||
|
||||
Result<Tensor> HorizontalFlip(const Tensor& tensor) {
|
||||
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensor, device_, stream_));
|
||||
|
||||
SyncOnScopeExit sync(stream_, src_tensor.buffer() != tensor.buffer(), src_tensor);
|
||||
|
||||
TensorDesc dst_desc = tensor.desc();
|
||||
dst_desc.device = device_;
|
||||
Tensor dst_tensor(dst_desc);
|
||||
auto stream = GetNative<cudaStream_t>(stream_);
|
||||
int h = (int)tensor.shape(1);
|
||||
int w = (int)tensor.shape(2);
|
||||
int c = (int)tensor.shape(3);
|
||||
ppl::common::RetCode ret;
|
||||
if (tensor.data_type() == DataType::kINT8) {
|
||||
auto input = tensor.data<uint8_t>();
|
||||
auto output = dst_tensor.data<uint8_t>();
|
||||
if (c == 1) {
|
||||
ret = ppl::cv::cuda::Flip<uint8_t, 1>(stream, h, w, w * c, input, w * c, output, 1);
|
||||
} else if (c == 3) {
|
||||
ret = ppl::cv::cuda::Flip<uint8_t, 3>(stream, h, w, w * c, input, w * c, output, 1);
|
||||
} else {
|
||||
ret = ppl::common::RC_UNSUPPORTED;
|
||||
}
|
||||
} else if (tensor.data_type() == DataType::kFLOAT) {
|
||||
auto input = tensor.data<float>();
|
||||
auto output = dst_tensor.data<float>();
|
||||
if (c == 1) {
|
||||
ret = ppl::cv::cuda::Flip<float, 1>(stream, h, w, w * c, input, w * c, output, 1);
|
||||
} else if (c == 3) {
|
||||
ret = ppl::cv::cuda::Flip<float, 3>(stream, h, w, w * c, input, w * c, output, 1);
|
||||
} else {
|
||||
ret = ppl::common::RC_UNSUPPORTED;
|
||||
}
|
||||
} else {
|
||||
MMDEPLOY_ERROR("unsupported data type {}", tensor.data_type());
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
return Status(eFail);
|
||||
}
|
||||
|
||||
return dst_tensor;
|
||||
}
|
||||
};
|
||||
|
||||
class TenCropImplCreator : public Creator<::mmdeploy::TenCropImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "cuda"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<TenCropImpl>(args); }
|
||||
|
||||
private:
|
||||
int version_{1};
|
||||
};
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace mmdeploy
|
||||
|
||||
using ::mmdeploy::TenCropImpl;
|
||||
using ::mmdeploy::cuda::TenCropImplCreator;
|
||||
REGISTER_MODULE(TenCropImpl, TenCropImplCreator);
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/preprocess/transform/three_crop.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cuda {
|
||||
|
||||
Result<Tensor> CropImage(Stream& stream, const Device& device, const Tensor& tensor, int top,
|
||||
int left, int bottom, int right);
|
||||
|
||||
class ThreeCropImpl : public ::mmdeploy::ThreeCropImpl {
|
||||
public:
|
||||
explicit ThreeCropImpl(const Value& args) : ::mmdeploy::ThreeCropImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
|
||||
int right) override {
|
||||
return ::mmdeploy::cuda::CropImage(stream_, device_, tensor, top, left, bottom, right);
|
||||
}
|
||||
};
|
||||
|
||||
class ThreeCropImplCreator : public Creator<::mmdeploy::ThreeCropImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "cuda"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<ThreeCropImpl>(args); }
|
||||
|
||||
private:
|
||||
int version_{1};
|
||||
};
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace mmdeploy
|
||||
|
||||
using ::mmdeploy::ThreeCropImpl;
|
||||
using ::mmdeploy::cuda::ThreeCropImplCreator;
|
||||
REGISTER_MODULE(ThreeCropImpl, ThreeCropImplCreator);
|
|
@ -6,6 +6,8 @@ set(SRCS
|
|||
collect.cpp
|
||||
compose.cpp
|
||||
crop.cpp
|
||||
three_crop.cpp
|
||||
ten_crop.cpp
|
||||
image2tensor.cpp
|
||||
default_format_bundle.cpp
|
||||
load.cpp
|
||||
|
@ -13,7 +15,8 @@ set(SRCS
|
|||
pad.cpp
|
||||
resize.cpp
|
||||
transform.cpp
|
||||
tracer.cpp)
|
||||
tracer.cpp
|
||||
lift.cpp)
|
||||
mmdeploy_add_module(${PROJECT_NAME} LIBRARY "${SRCS}")
|
||||
target_include_directories(
|
||||
${PROJECT_NAME} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/preprocess>)
|
||||
|
|
|
@ -8,6 +8,21 @@
|
|||
|
||||
namespace mmdeploy {
|
||||
|
||||
void SaveIntermediates(Value& value, Value::Array& intermediates) {
|
||||
if (value.is_array()) {
|
||||
for (auto& inner : value) {
|
||||
if (auto it = inner.find("__data__"); it != inner.end()) {
|
||||
std::move(it->begin(), it->end(), std::back_inserter(intermediates));
|
||||
it->array().clear();
|
||||
}
|
||||
}
|
||||
} else if (value.is_object()) {
|
||||
if (auto it = value.find("__data__"); it != value.end()) {
|
||||
std::move(it->begin(), it->end(), std::back_inserter(intermediates));
|
||||
it->array().clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
Compose::Compose(const Value& args, int version) : Transform(args) {
|
||||
assert(args.contains("context"));
|
||||
|
||||
|
@ -44,10 +59,7 @@ Result<Value> Compose::Process(const Value& input) {
|
|||
Value::Array intermediates;
|
||||
for (auto& transform : transforms_) {
|
||||
OUTCOME_TRY(auto t, transform->Process(output));
|
||||
if (auto it = t.find("__data__"); it != t.end()) {
|
||||
std::move(it->begin(), it->end(), std::back_inserter(intermediates));
|
||||
it->array().clear();
|
||||
}
|
||||
SaveIntermediates(t, intermediates);
|
||||
output = std::move(t);
|
||||
}
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/preprocess/transform/lift.h"
|
||||
|
||||
#include "mmdeploy/archive/json_archive.h"
|
||||
#include "mmdeploy/archive/value_archive.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
Lift::Lift(const Value& args, int version) : Transform(args) {
|
||||
std::string type = "Compose";
|
||||
auto creator = Registry<Transform>::Get().GetCreator(type, version);
|
||||
if (!creator) {
|
||||
MMDEPLOY_ERROR("Unable to find Transform creator: {}. Available transforms: {}", type,
|
||||
Registry<Transform>::Get().List());
|
||||
throw_exception(eEntryNotFound);
|
||||
}
|
||||
compose_ = creator->Create(args);
|
||||
}
|
||||
|
||||
Result<Value> Lift::Process(const Value& input) {
|
||||
Value output;
|
||||
for (int i = 0; i < input.size(); i++) {
|
||||
Value single = input[i];
|
||||
OUTCOME_TRY(auto t, compose_->Process(single));
|
||||
output.push_back(std::move(t));
|
||||
}
|
||||
return std::move(output);
|
||||
}
|
||||
|
||||
class LiftCreator : public Creator<Transform> {
|
||||
public:
|
||||
const char* GetName() const override { return "Lift"; }
|
||||
int GetVersion() const override { return version_; }
|
||||
ReturnType Create(const Value& args) override { return std::make_unique<Lift>(args, version_); }
|
||||
|
||||
private:
|
||||
int version_{1};
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Transform, LiftCreator);
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_SRC_PREPROCESS_TRANSFORM_LIFT_H_
|
||||
#define MMDEPLOY_SRC_PREPROCESS_TRANSFORM_LIFT_H_
|
||||
|
||||
#include "mmdeploy/preprocess/transform/transform.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class MMDEPLOY_API Lift : public Transform {
|
||||
public:
|
||||
explicit Lift(const Value& args, int version = 0);
|
||||
~Lift() override = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<Transform> compose_;
|
||||
};
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_SRC_PREPROCESS_TRANSFORM_Lift_H_
|
|
@ -0,0 +1,90 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/preprocess/transform/ten_crop.h"
|
||||
|
||||
#include "mmdeploy/archive/json_archive.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
TenCropImpl::TenCropImpl(const Value& args) : TransformImpl(args) {
|
||||
// (w, h) of crop size
|
||||
if (!args.contains(("crop_size"))) {
|
||||
throw std::invalid_argument("'crop_size' is expected");
|
||||
}
|
||||
if (args["crop_size"].is_number_integer()) {
|
||||
int crop_size = args["crop_size"].get<int>();
|
||||
arg_.crop_size[0] = arg_.crop_size[1] = crop_size;
|
||||
} else if (args["crop_size"].is_array() && args["crop_size"].size() == 2) {
|
||||
arg_.crop_size[0] = args["crop_size"][0].get<int>();
|
||||
arg_.crop_size[1] = args["crop_size"][1].get<int>();
|
||||
} else {
|
||||
throw std::invalid_argument("'crop_size' should be integer or an int array of size 2");
|
||||
}
|
||||
}
|
||||
|
||||
Result<Value> TenCropImpl::Process(const Value& input) {
|
||||
MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
|
||||
|
||||
// copy input data, and update its properties
|
||||
Value output = input;
|
||||
auto tensor = input["img"].get<Tensor>();
|
||||
int img_h = tensor.shape(1);
|
||||
int img_w = tensor.shape(2);
|
||||
int crop_w = arg_.crop_size[0];
|
||||
int crop_h = arg_.crop_size[1];
|
||||
|
||||
int w_step = (img_w - crop_w) / 4;
|
||||
int h_step = (img_h - crop_h) / 4;
|
||||
std::array<std::pair<int, int>, 5> offsets = {{{0, 0},
|
||||
{4 * w_step, 0},
|
||||
{0, 4 * h_step},
|
||||
{4 * w_step, 4 * h_step},
|
||||
{2 * w_step, 2 * h_step}}};
|
||||
vector<Tensor> cropped;
|
||||
cropped.reserve(10);
|
||||
for (const auto& [offx, offy] : offsets) {
|
||||
int y1 = offy;
|
||||
int y2 = offy + crop_h - 1;
|
||||
int x1 = offx;
|
||||
int x2 = offx + crop_w - 1;
|
||||
OUTCOME_TRY(auto cropped_tensor, CropImage(tensor, y1, x1, y2, x2));
|
||||
OUTCOME_TRY(auto flipped_tensor, HorizontalFlip(cropped_tensor));
|
||||
cropped.push_back(std::move(cropped_tensor));
|
||||
cropped.push_back(std::move(flipped_tensor));
|
||||
}
|
||||
|
||||
output["imgs"] = Value{};
|
||||
for (int i = 0; i < cropped.size(); i++) {
|
||||
output["imgs"].push_back(cropped[i]);
|
||||
output["__data__"].push_back(std::move(cropped[i]));
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
TenCrop::TenCrop(const Value& args, int version) : Transform(args) {
|
||||
auto impl_creator = Registry<TenCropImpl>::Get().GetCreator(specified_platform_, version);
|
||||
if (nullptr == impl_creator) {
|
||||
MMDEPLOY_ERROR("'TenCrop' is not supported on '{}' platform", specified_platform_);
|
||||
throw std::domain_error("'Resize' is not supported on specified platform");
|
||||
}
|
||||
impl_ = impl_creator->Create(args);
|
||||
}
|
||||
|
||||
class TenCropCreator : public Creator<Transform> {
|
||||
public:
|
||||
const char* GetName(void) const override { return "TenCrop"; }
|
||||
int GetVersion(void) const override { return version_; }
|
||||
ReturnType Create(const Value& args) override {
|
||||
return std::make_unique<TenCrop>(args, version_);
|
||||
}
|
||||
|
||||
private:
|
||||
int version_{1};
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Transform, TenCropCreator);
|
||||
MMDEPLOY_DEFINE_REGISTRY(TenCropImpl);
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_TEN_CROP_H
|
||||
#define MMDEPLOY_TEN_CROP_H
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
#include "transform.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class MMDEPLOY_API TenCropImpl : public TransformImpl {
|
||||
public:
|
||||
explicit TenCropImpl(const Value& args);
|
||||
~TenCropImpl() override = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override;
|
||||
|
||||
protected:
|
||||
virtual Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
|
||||
int right) = 0;
|
||||
virtual Result<Tensor> HorizontalFlip(const Tensor& tensor) = 0;
|
||||
|
||||
protected:
|
||||
struct ten_crop_arg_t {
|
||||
std::array<int, 2> crop_size;
|
||||
};
|
||||
using ArgType = struct ten_crop_arg_t;
|
||||
|
||||
protected:
|
||||
ArgType arg_;
|
||||
};
|
||||
|
||||
class MMDEPLOY_API TenCrop : public Transform {
|
||||
public:
|
||||
explicit TenCrop(const Value& args, int version = 0);
|
||||
~TenCrop() override = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override { return impl_->Process(input); }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<TenCropImpl> impl_;
|
||||
};
|
||||
|
||||
MMDEPLOY_DECLARE_REGISTRY(TenCropImpl);
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif
|
|
@ -0,0 +1,101 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/preprocess/transform/three_crop.h"
|
||||
|
||||
#include "mmdeploy/archive/json_archive.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
Result<void> check_input_shape(int img_h, int img_w, int crop_h, int crop_w) {
|
||||
if (img_h == crop_h || img_w == crop_w) {
|
||||
return success();
|
||||
}
|
||||
MMDEPLOY_ERROR("ThreeCrop error, img_h: {} != crop_h: {} && img_w: {} != crop_w {}", img_h,
|
||||
crop_h, img_w, crop_w);
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
|
||||
ThreeCropImpl::ThreeCropImpl(const Value& args) : TransformImpl(args) {
|
||||
// (w, h) of crop size
|
||||
if (!args.contains(("crop_size"))) {
|
||||
throw std::invalid_argument("'crop_size' is expected");
|
||||
}
|
||||
if (args["crop_size"].is_number_integer()) {
|
||||
int crop_size = args["crop_size"].get<int>();
|
||||
arg_.crop_size[0] = arg_.crop_size[1] = crop_size;
|
||||
} else if (args["crop_size"].is_array() && args["crop_size"].size() == 2) {
|
||||
arg_.crop_size[0] = args["crop_size"][0].get<int>();
|
||||
arg_.crop_size[1] = args["crop_size"][1].get<int>();
|
||||
} else {
|
||||
throw std::invalid_argument("'crop_size' should be integer or an int array of size 2");
|
||||
}
|
||||
}
|
||||
|
||||
Result<Value> ThreeCropImpl::Process(const Value& input) {
|
||||
MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
|
||||
|
||||
// copy input data, and update its properties
|
||||
Value output = input;
|
||||
auto tensor = input["img"].get<Tensor>();
|
||||
auto desc = tensor.desc();
|
||||
int img_h = desc.shape[1];
|
||||
int img_w = desc.shape[2];
|
||||
int crop_w = arg_.crop_size[0];
|
||||
int crop_h = arg_.crop_size[1];
|
||||
OUTCOME_TRY(check_input_shape(img_h, img_w, crop_h, crop_w));
|
||||
|
||||
std::array<std::pair<int, int>, 3> offsets;
|
||||
if (crop_h == img_h) {
|
||||
int w_step = (img_w - crop_w) / 2;
|
||||
offsets = {{{0, 0}, {2 * w_step, 0}, {w_step, 0}}};
|
||||
} else if (crop_w == img_w) {
|
||||
int h_step = (img_h - crop_h) / 2;
|
||||
offsets = {{{0, 0}, {0, 2 * h_step}, {0, h_step}}};
|
||||
}
|
||||
vector<Tensor> cropped;
|
||||
cropped.reserve(3);
|
||||
for (const auto& [offx, offy] : offsets) {
|
||||
int y1 = offy;
|
||||
int y2 = offy + crop_h - 1;
|
||||
int x1 = offx;
|
||||
int x2 = offx + crop_w - 1;
|
||||
OUTCOME_TRY(auto dst_tensor, CropImage(tensor, y1, x1, y2, x2));
|
||||
cropped.push_back(std::move(dst_tensor));
|
||||
}
|
||||
|
||||
output["imgs"] = Value{};
|
||||
for (int i = 0; i < cropped.size(); i++) {
|
||||
output["imgs"].push_back(cropped[i]);
|
||||
output["__data__"].push_back(std::move(cropped[i]));
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
ThreeCrop::ThreeCrop(const Value& args, int version) : Transform(args) {
|
||||
auto impl_creator = Registry<ThreeCropImpl>::Get().GetCreator(specified_platform_, version);
|
||||
if (nullptr == impl_creator) {
|
||||
MMDEPLOY_ERROR("'ThreeCrop' is not supported on '{}' platform", specified_platform_);
|
||||
throw std::domain_error("'Resize' is not supported on specified platform");
|
||||
}
|
||||
impl_ = impl_creator->Create(args);
|
||||
}
|
||||
|
||||
class ThreeCropCreator : public Creator<Transform> {
|
||||
public:
|
||||
const char* GetName(void) const override { return "ThreeCrop"; }
|
||||
int GetVersion(void) const override { return version_; }
|
||||
ReturnType Create(const Value& args) override {
|
||||
return std::make_unique<ThreeCrop>(args, version_);
|
||||
}
|
||||
|
||||
private:
|
||||
int version_{1};
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Transform, ThreeCropCreator);
|
||||
MMDEPLOY_DEFINE_REGISTRY(ThreeCropImpl);
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_THREE_CROP_H
|
||||
#define MMDEPLOY_THREE_CROP_H
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
#include "transform.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class MMDEPLOY_API ThreeCropImpl : public TransformImpl {
|
||||
public:
|
||||
explicit ThreeCropImpl(const Value& args);
|
||||
~ThreeCropImpl() override = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override;
|
||||
|
||||
protected:
|
||||
virtual Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
|
||||
int right) = 0;
|
||||
|
||||
protected:
|
||||
struct three_crop_arg_t {
|
||||
std::array<int, 2> crop_size;
|
||||
};
|
||||
using ArgType = struct three_crop_arg_t;
|
||||
|
||||
protected:
|
||||
ArgType arg_;
|
||||
};
|
||||
|
||||
class MMDEPLOY_API ThreeCrop : public Transform {
|
||||
public:
|
||||
explicit ThreeCrop(const Value& args, int version = 0);
|
||||
~ThreeCrop() override = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override { return impl_->Process(input); }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ThreeCropImpl> impl_;
|
||||
};
|
||||
|
||||
MMDEPLOY_DECLARE_REGISTRY(ThreeCropImpl);
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif
|
|
@ -38,6 +38,7 @@ add_example(restorer c image_restorer)
|
|||
add_example(text_detector c ocr)
|
||||
add_example(pose_detector c pose_detection)
|
||||
add_example(rotated_detector c rotated_object_detection)
|
||||
add_example(video_recognizer c video_recognition)
|
||||
# TODO: figure out a better way
|
||||
#add_example("" c det_cls)
|
||||
#add_example("" c det_pose)
|
||||
|
@ -52,4 +53,5 @@ if (MMDEPLOY_BUILD_SDK_CXX_API)
|
|||
add_example(pose_detector cpp pose_detector)
|
||||
add_example(rotated_detector cpp rotated_detector)
|
||||
add_example(pose_detector cpp pose_tracker)
|
||||
add_example(video_recognizer cpp video_cls)
|
||||
endif ()
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
#include <fstream>
|
||||
#include <map>
|
||||
#include <opencv2/imgcodecs/imgcodecs.hpp>
|
||||
#include <opencv2/videoio.hpp>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "mmdeploy/video_recognizer.h"
|
||||
|
||||
void SampleFrames(const char* video_path, std::map<int, cv::Mat>& buffer,
|
||||
std::vector<mmdeploy_mat_t>& clips, int clip_len, int frame_interval = 1,
|
||||
int num_clips = 1) {
|
||||
cv::VideoCapture cap = cv::VideoCapture(video_path);
|
||||
if (!cap.isOpened()) {
|
||||
fprintf(stderr, "failed to load video: %s\n", video_path);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int num_frames = cap.get(cv::CAP_PROP_FRAME_COUNT);
|
||||
printf("num_frames %d\n", num_frames);
|
||||
|
||||
int ori_clip_len = clip_len * frame_interval;
|
||||
float avg_interval = (num_frames - ori_clip_len + 1.f) / num_clips;
|
||||
std::vector<int> frame_inds;
|
||||
for (int i = 0; i < num_clips; i++) {
|
||||
int clip_offset = i * avg_interval + avg_interval / 2.0;
|
||||
for (int j = 0; j < clip_len; j++) {
|
||||
int ind = (j * frame_interval + clip_offset) % num_frames;
|
||||
if (num_frames <= ori_clip_len - 1) {
|
||||
ind = j % num_frames;
|
||||
}
|
||||
frame_inds.push_back(ind);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> unique_inds(frame_inds.begin(), frame_inds.end());
|
||||
std::sort(unique_inds.begin(), unique_inds.end());
|
||||
auto last = std::unique(unique_inds.begin(), unique_inds.end());
|
||||
unique_inds.erase(last, unique_inds.end());
|
||||
|
||||
int ind = 0;
|
||||
for (int i = 0; i < unique_inds.size(); i++) {
|
||||
int tid = unique_inds[i];
|
||||
cv::Mat frame;
|
||||
while (ind < tid) {
|
||||
cap.read(frame);
|
||||
ind++;
|
||||
}
|
||||
cap.read(frame);
|
||||
buffer[tid] = frame;
|
||||
ind++;
|
||||
}
|
||||
|
||||
clips.resize(frame_inds.size());
|
||||
for (int i = 0; i < frame_inds.size(); i++) {
|
||||
auto& img = buffer[frame_inds[i]];
|
||||
mmdeploy_mat_t mat{
|
||||
img.data, img.rows, img.cols, 3, MMDEPLOY_PIXEL_FORMAT_BGR, MMDEPLOY_DATA_TYPE_UINT8};
|
||||
clips[i] = mat;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 7) {
|
||||
fprintf(stderr,
|
||||
"usage:\n video_recognition device_name dump_model_directory video_path clip_len "
|
||||
"frame_interval num_clips \n");
|
||||
return 1;
|
||||
}
|
||||
auto device_name = argv[1];
|
||||
auto model_path = argv[2];
|
||||
auto video_path = argv[3];
|
||||
|
||||
int clip_len = std::stoi(argv[4]);
|
||||
int frame_interval = std::stoi(argv[5]);
|
||||
int num_clips = std::stoi(argv[6]);
|
||||
|
||||
std::map<int, cv::Mat> buffer;
|
||||
std::vector<mmdeploy_mat_t> clips;
|
||||
std::vector<mmdeploy_video_sample_info_t> clip_info;
|
||||
SampleFrames(video_path, buffer, clips, clip_len, frame_interval, num_clips);
|
||||
clip_info.push_back({clip_len, num_clips});
|
||||
|
||||
mmdeploy_video_recognizer_t recognizer{};
|
||||
int status{};
|
||||
status = mmdeploy_video_recognizer_create_by_path(model_path, device_name, 0, &recognizer);
|
||||
if (status != MMDEPLOY_SUCCESS) {
|
||||
fprintf(stderr, "failed to create recognizer, code: %d\n", (int)status);
|
||||
return 1;
|
||||
}
|
||||
|
||||
mmdeploy_video_recognition_t* res{};
|
||||
int* res_count{};
|
||||
status = mmdeploy_video_recognizer_apply(recognizer, clips.data(), clip_info.data(), 1, &res,
|
||||
&res_count);
|
||||
if (status != MMDEPLOY_SUCCESS) {
|
||||
fprintf(stderr, "failed to apply classifier, code: %d\n", (int)status);
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < res_count[0]; ++i) {
|
||||
fprintf(stderr, "label: %d, score: %.4f\n", res[i].label_id, res[i].score);
|
||||
}
|
||||
|
||||
mmdeploy_video_recognizer_release_result(res, res_count, 1);
|
||||
|
||||
mmdeploy_video_recognizer_destroy(recognizer);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "mmdeploy/video_recognizer.hpp"
|
||||
#include "opencv2/imgcodecs/imgcodecs.hpp"
|
||||
#include "opencv2/videoio.hpp"
|
||||
|
||||
void SampleFrames(const char* video_path, std::map<int, cv::Mat>& buffer,
|
||||
std::vector<mmdeploy::Mat>& clips, int clip_len, int frame_interval = 1,
|
||||
int num_clips = 1) {
|
||||
cv::VideoCapture cap = cv::VideoCapture(video_path);
|
||||
if (!cap.isOpened()) {
|
||||
fprintf(stderr, "failed to load video: %s\n", video_path);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int num_frames = cap.get(cv::CAP_PROP_FRAME_COUNT);
|
||||
printf("num_frames %d\n", num_frames);
|
||||
|
||||
int ori_clip_len = clip_len * frame_interval;
|
||||
float avg_interval = (num_frames - ori_clip_len + 1.f) / num_clips;
|
||||
std::vector<int> frame_inds;
|
||||
for (int i = 0; i < num_clips; i++) {
|
||||
int clip_offset = i * avg_interval + avg_interval / 2.0;
|
||||
for (int j = 0; j < clip_len; j++) {
|
||||
int ind = (j * frame_interval + clip_offset) % num_frames;
|
||||
if (num_frames <= ori_clip_len - 1) {
|
||||
ind = j % num_frames;
|
||||
}
|
||||
frame_inds.push_back(ind);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> unique_inds(frame_inds.begin(), frame_inds.end());
|
||||
std::sort(unique_inds.begin(), unique_inds.end());
|
||||
auto last = std::unique(unique_inds.begin(), unique_inds.end());
|
||||
unique_inds.erase(last, unique_inds.end());
|
||||
|
||||
int ind = 0;
|
||||
for (int i = 0; i < unique_inds.size(); i++) {
|
||||
int tid = unique_inds[i];
|
||||
cv::Mat frame;
|
||||
while (ind < tid) {
|
||||
cap.read(frame);
|
||||
ind++;
|
||||
}
|
||||
cap.read(frame);
|
||||
buffer[tid] = frame;
|
||||
ind++;
|
||||
}
|
||||
|
||||
clips.resize(frame_inds.size());
|
||||
for (int i = 0; i < frame_inds.size(); i++) {
|
||||
auto& img = buffer[frame_inds[i]];
|
||||
clips[i] = img;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 7) {
|
||||
fprintf(stderr,
|
||||
"usage:\n video_cls device_name model_path video_path video_path clip_len "
|
||||
"frame_interval num_clips\n");
|
||||
return 1;
|
||||
}
|
||||
auto device_name = argv[1];
|
||||
auto model_path = argv[2];
|
||||
auto video_path = argv[3];
|
||||
|
||||
int clip_len = std::stoi(argv[4]);
|
||||
int frame_interval = std::stoi(argv[5]);
|
||||
int num_clips = std::stoi(argv[6]);
|
||||
|
||||
std::map<int, cv::Mat> buffer;
|
||||
std::vector<mmdeploy::Mat> clips;
|
||||
mmdeploy::VideoSampleInfo clip_info = {clip_len, num_clips};
|
||||
SampleFrames(video_path, buffer, clips, clip_len, frame_interval, num_clips);
|
||||
|
||||
mmdeploy::Model model(model_path);
|
||||
mmdeploy::VideoRecognizer recognizer(model, mmdeploy::Device{device_name, 0});
|
||||
|
||||
auto res = recognizer.Apply(clips, clip_info);
|
||||
|
||||
for (const auto& cls : res) {
|
||||
fprintf(stderr, "label: %d, score: %.4f\n", cls.label_id, cls.score);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
from mmdeploy_python import VideoRecognizer
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='show how to use sdk python api')
|
||||
parser.add_argument('device_name', help='name of device, cuda or cpu')
|
||||
parser.add_argument(
|
||||
'model_path',
|
||||
help='path of mmdeploy SDK model dumped by model converter')
|
||||
parser.add_argument('video_path', help='path of an video')
|
||||
parser.add_argument(
|
||||
'--clip_len', help='Frames of each sampled output clip', default=1)
|
||||
parser.add_argument(
|
||||
'--frame_interval',
|
||||
help='Temporal interval of adjacent sampled frames.',
|
||||
default=1)
|
||||
parser.add_argument(
|
||||
'--num_clips', help='Number of clips to be sampled', default=25)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def SampleFrames(cap, clip_len, frame_interval, num_clips):
|
||||
if not cap.isOpened():
|
||||
print('failed to load video')
|
||||
exit(-1)
|
||||
|
||||
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
ori_clip_len = clip_len * frame_interval
|
||||
avg_interval = (num_frames - ori_clip_len + 1) / float(num_clips)
|
||||
frame_inds = []
|
||||
for i in range(num_clips):
|
||||
clip_offset = int(i * avg_interval + avg_interval / 2.0)
|
||||
for j in range(clip_len):
|
||||
ind = (j * frame_interval + clip_offset) % num_frames
|
||||
if num_frames <= ori_clip_len - 1:
|
||||
ind = j % num_frames
|
||||
frame_inds.append(ind)
|
||||
|
||||
unique_inds = sorted(list(set(frame_inds)))
|
||||
buffer = {}
|
||||
ind = 0
|
||||
for i, tid in enumerate(unique_inds):
|
||||
while ind < tid:
|
||||
_, mat = cap.read()
|
||||
ind += 1
|
||||
_, mat = cap.read()
|
||||
buffer[tid] = mat
|
||||
ind += 1
|
||||
|
||||
clips = []
|
||||
for tid in frame_inds:
|
||||
clips.append(buffer[tid])
|
||||
info = (clip_len, num_clips)
|
||||
return clips, info
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
cap = cv2.VideoCapture(args.video_path)
|
||||
|
||||
recognizer = VideoRecognizer(
|
||||
model_path=args.model_path, device_name=args.device_name, device_id=0)
|
||||
|
||||
clips, info = SampleFrames(cap, args.clip_len, args.frame_interval,
|
||||
args.num_clips)
|
||||
|
||||
result = recognizer(clips, info)
|
||||
for label_id, score in result:
|
||||
print(label_id, score)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1812,6 +1812,76 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
|
|||
</table>
|
||||
</div>
|
||||
|
||||
<div style="margin-left: 25px;">
|
||||
<table class="docutils">
|
||||
<thead>
|
||||
<tr>
|
||||
<th align="center" colspan="4">mmaction2</th>
|
||||
<th align="center">Pytorch</th>
|
||||
<th align="center">ONNXRuntime</th>
|
||||
<th align="center" colspan="2">TensorRT</th>
|
||||
<th align="center">PPLNN</th>
|
||||
<th align="center">OpenVINO</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td align="center">model</td>
|
||||
<td align="center">task</td>
|
||||
<td align="center">dataset</td>
|
||||
<td align="center">metrics</td>
|
||||
<td align="center">fp32</td>
|
||||
<td align="center">fp32</td>
|
||||
<td align="center">fp32</td>
|
||||
<td align="center">fp16</td>
|
||||
<td align="center">fp16</td>
|
||||
<td align="center">fp32</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py">TSN</a></td>
|
||||
<td align="center" rowspan="2">Recognition</td>
|
||||
<td align="center" rowspan="2">Kinetics-400</td>
|
||||
<td align="center">top-1</td>
|
||||
<td align="center">69.71</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">69.71</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">top-5</td>
|
||||
<td align="center">88.75</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">88.75</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/slowfast/slowfast_r50_8xb8-4x16x1-256e_kinetics400-rgb.py">SlowFast</a></td>
|
||||
<td align="center" rowspan="2">Recognition</td>
|
||||
<td align="center" rowspan="2">Kinetics-400</td>
|
||||
<td align="center">top-1</td>
|
||||
<td align="center">74.45</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">75.62</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">top-5</td>
|
||||
<td align="center">91.55</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">92.10</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
## Notes
|
||||
|
||||
- As some datasets contain images with various resolutions in codebase like MMDet. The speed benchmark is gained through static configs in MMDeploy, while the performance benchmark is gained through dynamic ones.
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
# MMAction2 Deployment
|
||||
|
||||
- [MMAction2 Deployment](#mmaction2-deployment)
|
||||
- [Installation](#installation)
|
||||
- [Install mmaction2](#install-mmaction2)
|
||||
- [Install mmdeploy](#install-mmdeploy)
|
||||
- [Convert model](#convert-model)
|
||||
- [Convert video recognition model](#convert-video-recognition-model)
|
||||
- [Model specification](#model-specification)
|
||||
- [Model Inference](#model-inference)
|
||||
- [Backend model inference](#backend-model-inference)
|
||||
- [SDK model inference](#sdk-model-inference)
|
||||
- [Video recognition SDK model inference](#video-recognition-sdk-model-inference)
|
||||
- [Supported models](#supported-models)
|
||||
|
||||
______________________________________________________________________
|
||||
|
||||
[MMAction2](https://github.com/open-mmlab/mmaction2) is an open-source toolbox for video understanding based on PyTorch. It is a part of the [OpenMMLab](https://openmmlab.com) project.
|
||||
|
||||
## Installation
|
||||
|
||||
### Install mmaction2
|
||||
|
||||
Please follow the [installation guide](https://github.com/open-mmlab/mmaction2/tree/dev-1.x#installation) to install mmocr.
|
||||
|
||||
### Install mmdeploy
|
||||
|
||||
There are several methods to install mmdeploy, among which you can choose an appropriate one according to your target platform and device.
|
||||
|
||||
**Method I:** Install precompiled package
|
||||
|
||||
You can download the latest release package from [here](https://github.com/open-mmlab/mmdeploy/releases)
|
||||
|
||||
**Method II:** Build using scripts
|
||||
|
||||
If your target platform is **Ubuntu 18.04 or later version**, we encourage you to run
|
||||
[scripts](../01-how-to-build/build_from_script.md). For example, the following commands install mmdeploy as well as inference engine - `ONNX Runtime`.
|
||||
|
||||
```shell
|
||||
git clone --recursive -b dev-1.x https://github.com/open-mmlab/mmdeploy.git
|
||||
cd mmdeploy
|
||||
python3 tools/scripts/build_ubuntu_x64_ort.py $(nproc)
|
||||
export PYTHONPATH=$(pwd)/build/lib:$PYTHONPATH
|
||||
export LD_LIBRARY_PATH=$(pwd)/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1/lib/:$LD_LIBRARY_PATH
|
||||
```
|
||||
|
||||
**Method III:** Build from source
|
||||
|
||||
If neither **I** nor **II** meets your requirements, [building mmdeploy from source](../01-how-to-build/build_from_source.md) is the last option.
|
||||
|
||||
## Convert model
|
||||
|
||||
You can use [tools/deploy.py](https://github.com/open-mmlab/mmdeploy/blob/dev-1.x/tools/deploy.py) to convert mmocr models to the specified backend models. Its detailed usage can be learned from [here](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/02-how-to-run/convert_model.md#usage).
|
||||
|
||||
When using `tools/deploy.py`, it is crucial to specify the correct deployment config. We've already provided builtin deployment config [files](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/configs/mmaction) of all supported backends for mmocr, under which the config file path follows the pattern:
|
||||
|
||||
```
|
||||
{task}/{task}_{backend}-{precision}_{static | dynamic}_{shape}.py
|
||||
```
|
||||
|
||||
其中:
|
||||
|
||||
- **{task}:** task in mmaction2.
|
||||
- **{backend}:** inference backend, such as onnxruntime, tensorrt, pplnn, ncnn, openvino, coreml etc.
|
||||
- **{precision}:** fp16, int8. When it's empty, it means fp32
|
||||
- **{static | dynamic}:** static shape or dynamic shape
|
||||
- **{shape}:** input shape or shape range of a model
|
||||
- **{2d/3d}:** model type
|
||||
|
||||
In the next part,we will take `tsn` model from `video recognition` task as an example, showing how to convert them to onnx model that can be inferred by ONNX Runtime.
|
||||
|
||||
### Convert video recognition model
|
||||
|
||||
```shell
|
||||
cd mmdeploy
|
||||
|
||||
# download tsn model from mmaction2 model zoo
|
||||
mim download mmaction2 --config tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb --dest .
|
||||
|
||||
# convert mmaction2 model to onnxruntime model with dynamic shape
|
||||
python tools/deploy.py \
|
||||
configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py \
|
||||
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb \
|
||||
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220906-cd10898e.pth \
|
||||
tests/data/arm_wrestling.mp4 \
|
||||
--work-dir mmdeploy_models/mmaction/tsn/ort \
|
||||
--device cpu \
|
||||
--show \
|
||||
--dump-info
|
||||
```
|
||||
|
||||
## Model specification
|
||||
|
||||
Before moving on to model inference chapter, let's know more about the converted model structure which is very important for model inference.
|
||||
|
||||
The converted model locates in the working directory like `mmdeploy_models/mmaction/tsn/ort` in the previous example. It includes:
|
||||
|
||||
```
|
||||
mmdeploy_models/mmaction/tsn/ort
|
||||
├── deploy.json
|
||||
├── detail.json
|
||||
├── end2end.onnx
|
||||
└── pipeline.json
|
||||
```
|
||||
|
||||
in which,
|
||||
|
||||
- **end2end.onnx**: backend model which can be inferred by ONNX Runtime
|
||||
- \***.json**: the necessary information for mmdeploy SDK
|
||||
|
||||
The whole package **mmdeploy_models/mmocr/dbnet/ort** is defined as **mmdeploy SDK model**, i.e., **mmdeploy SDK model** includes both backend model and inference meta information.
|
||||
|
||||
## Model Inference
|
||||
|
||||
### Backend model inference
|
||||
|
||||
Take the previous converted `end2end.onnx` mode of `tsn` as an example, you can use the following code to inference the model and visualize the results.
|
||||
|
||||
```python
|
||||
from mmdeploy.apis.utils import build_task_processor
|
||||
from mmdeploy.utils import get_input_shape, load_config
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
deploy_cfg = 'configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py'
|
||||
model_cfg = 'tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb'
|
||||
device = 'cpu'
|
||||
backend_model = ['./mmdeploy_models/mmaction2/tsn/ort/end2end.onnx']
|
||||
image = 'tests/data/arm_wrestling.mp4'
|
||||
|
||||
# read deploy_cfg and model_cfg
|
||||
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
|
||||
|
||||
# build task and backend model
|
||||
task_processor = build_task_processor(model_cfg, deploy_cfg, device)
|
||||
model = task_processor.build_backend_model(backend_model)
|
||||
|
||||
# process input image
|
||||
input_shape = get_input_shape(deploy_cfg)
|
||||
model_inputs, _ = task_processor.create_input(image, input_shape)
|
||||
|
||||
# do model inference
|
||||
with torch.no_grad():
|
||||
result = model.test_step(model_inputs)
|
||||
|
||||
# show top5-results
|
||||
pred_scores = result[0].pred_scores.item.tolist()
|
||||
top_index = np.argsort(pred_scores)[::-1]
|
||||
for i in range(5):
|
||||
index = top_index[i]
|
||||
print(index, pred_scores[index])
|
||||
```
|
||||
|
||||
### SDK model inference
|
||||
|
||||
Given the above SDK model of `tsn` you can also perform SDK model inference like following,
|
||||
|
||||
#### Video recognition SDK model inference
|
||||
|
||||
```python
|
||||
from mmdeploy_python import VideoRecognizer
|
||||
import cv2
|
||||
|
||||
# refer to demo/python/video_recognition.py
|
||||
# def SampleFrames(cap, clip_len, frame_interval, num_clips):
|
||||
# ...
|
||||
|
||||
cap = cv2.VideoCapture('tests/data/arm_wrestling.mp4')
|
||||
|
||||
clips, info = SampleFrames(cap, 1, 1, 25)
|
||||
|
||||
# create a recognizer
|
||||
recognizer = VideoRecognizer(model_path='./mmdeploy_models/mmaction/tsn/ort', device_name='cpu', device_id=0)
|
||||
# perform inference
|
||||
result = recognizer(clips, info)
|
||||
# show inference result
|
||||
for label_id, score in result:
|
||||
print(label_id, score)
|
||||
```
|
||||
|
||||
Besides python API, mmdeploy SDK also provides other FFI (Foreign Function Interface), such as C, C++, C#, Java and so on. You can learn their usage from [demos](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/demo).
|
||||
|
||||
> MMAction2 only API of c, c++ and python for now.
|
||||
|
||||
## Supported models
|
||||
|
||||
| Model | TorchScript | ONNX Runtime | TensorRT | ncnn | PPLNN | OpenVINO |
|
||||
| :-------------------------------------------------------------------------------------------- | :---------: | :----------: | :------: | :--: | :---: | :------: |
|
||||
| [TSN](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/tsn) | N | Y | Y | N | N | N |
|
||||
| [SlowFast](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/slowfast) | N | Y | Y | N | N | N |
|
|
@ -1807,6 +1807,76 @@ GPU: ncnn, TensorRT, PPLNN
|
|||
</table>
|
||||
</div>
|
||||
|
||||
<div style="margin-left: 25px;">
|
||||
<table class="docutils">
|
||||
<thead>
|
||||
<tr>
|
||||
<th align="center" colspan="4">mmaction2</th>
|
||||
<th align="center">Pytorch</th>
|
||||
<th align="center">ONNXRuntime</th>
|
||||
<th align="center" colspan="2">TensorRT</th>
|
||||
<th align="center">PPLNN</th>
|
||||
<th align="center">OpenVINO</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td align="center">model</td>
|
||||
<td align="center">task</td>
|
||||
<td align="center">dataset</td>
|
||||
<td align="center">metrics</td>
|
||||
<td align="center">fp32</td>
|
||||
<td align="center">fp32</td>
|
||||
<td align="center">fp32</td>
|
||||
<td align="center">fp16</td>
|
||||
<td align="center">fp16</td>
|
||||
<td align="center">fp32</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py">TSN</a></td>
|
||||
<td align="center" rowspan="2">Recognition</td>
|
||||
<td align="center" rowspan="2">Kinetics-400</td>
|
||||
<td align="center">top-1</td>
|
||||
<td align="center">69.71</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">69.71</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">top-5</td>
|
||||
<td align="center">88.75</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">88.75</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" rowspan="2"><a href="https://github.com/open-mmlab/mmaction2/blob/dev-1.x/configs/recognition/slowfast/slowfast_r50_8xb8-4x16x1-256e_kinetics400-rgb.py">SlowFast</a></td>
|
||||
<td align="center" rowspan="2">Recognition</td>
|
||||
<td align="center" rowspan="2">Kinetics-400</td>
|
||||
<td align="center">top-1</td>
|
||||
<td align="center">74.45</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">75.62</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">top-5</td>
|
||||
<td align="center">91.55</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">92.10</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
<td align="center">-</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
## 备注
|
||||
|
||||
- 由于某些数据集在代码库中包含各种分辨率的图像,例如 MMDet,速度基准是通过 MMDeploy 中的静态配置获得的,而性能基准是通过动态配置获得的
|
||||
|
|
|
@ -0,0 +1,193 @@
|
|||
# MMAction2 模型部署
|
||||
|
||||
- [MMAction2 模型部署](#mmaction2-模型部署)
|
||||
- [安装](#安装)
|
||||
- [安装 mmaction2](#安装-mmaction2)
|
||||
- [安装 mmdeploy](#安装-mmdeploy)
|
||||
- [模型转换](#模型转换)
|
||||
- [视频分类任务模型转换](#视频分类任务模型转换)
|
||||
- [模型规范](#模型规范)
|
||||
- [模型推理](#模型推理)
|
||||
- [后端模型推理](#后端模型推理)
|
||||
- [SDK 模型推理](#sdk-模型推理)
|
||||
- [视频分类 SDK 模型推理](#视频分类-sdk-模型推理)
|
||||
- [模型支持列表](#模型支持列表)
|
||||
|
||||
______________________________________________________________________
|
||||
|
||||
[MMAction2](https://github.com/open-mmlab/mmaction2)是一款基于 PyTorch 的视频理解开源工具箱,是[OpenMMLab](https://openmmlab.com)项目的成员之一。
|
||||
|
||||
## 安装
|
||||
|
||||
### 安装 mmaction2
|
||||
|
||||
请参考[官网安装指南](https://github.com/open-mmlab/mmaction2/tree/dev-1.x#installation).
|
||||
|
||||
### 安装 mmdeploy
|
||||
|
||||
mmdeploy 有以下几种安装方式:
|
||||
|
||||
**方式一:** 安装预编译包
|
||||
|
||||
通过此[链接](https://github.com/open-mmlab/mmdeploy/releases)获取最新的预编译包
|
||||
|
||||
**方式二:** 一键式脚本安装
|
||||
|
||||
如果部署平台是 **Ubuntu 18.04 及以上版本**, 请参考[脚本安装说明](../01-how-to-build/build_from_script.md),完成安装过程。
|
||||
比如,以下命令可以安装 mmdeploy 以及配套的推理引擎——`ONNX Runtime`.
|
||||
|
||||
```shell
|
||||
git clone --recursive -b dev-1.x https://github.com/open-mmlab/mmdeploy.git
|
||||
cd mmdeploy
|
||||
python3 tools/scripts/build_ubuntu_x64_ort.py $(nproc)
|
||||
export PYTHONPATH=$(pwd)/build/lib:$PYTHONPATH
|
||||
export LD_LIBRARY_PATH=$(pwd)/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1/lib/:$LD_LIBRARY_PATH
|
||||
```
|
||||
|
||||
**方式三:** 源码安装
|
||||
|
||||
在方式一、二都满足不了的情况下,请参考[源码安装说明](../01-how-to-build/build_from_source.md) 安装 mmdeploy 以及所需推理引擎。
|
||||
|
||||
## 模型转换
|
||||
|
||||
你可以使用 [tools/deploy.py](https://github.com/open-mmlab/mmdeploy/blob/dev-1.x/tools/deploy.py) 把 mmaction2 模型一键式转换为推理后端模型。
|
||||
该工具的详细使用说明请参考[这里](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/02-how-to-run/convert_model.md#usage).
|
||||
|
||||
转换的关键之一是使用正确的配置文件。项目中已内置了各后端部署[配置文件](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/configs/mmaction)。
|
||||
文件的命名模式是:
|
||||
|
||||
```
|
||||
{task}/{task}_{backend}-{precision}_{static | dynamic}_{shape}.py
|
||||
```
|
||||
|
||||
其中:
|
||||
|
||||
- **{task}:** mmaction2 中的任务
|
||||
- **{backend}:** 推理后端名称。比如,onnxruntime、tensorrt、pplnn、ncnn、openvino、coreml 等等
|
||||
- **{precision}:** 推理精度。比如,fp16、int8。不填表示 fp32
|
||||
- **{static | dynamic}:** 动态、静态 shape
|
||||
- **{shape}:** 模型输入的 shape 或者 shape 范围
|
||||
- **{2d/3d}:** 表示模型的类别
|
||||
|
||||
以下,我们将演示如何把视频分类任务中 `tsn` 模型转换为 onnx 模型。
|
||||
|
||||
### 视频分类任务模型转换
|
||||
|
||||
```shell
|
||||
cd mmdeploy
|
||||
|
||||
# download tsn model from mmaction2 model zoo
|
||||
mim download mmaction2 --config tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb --dest .
|
||||
|
||||
# convert mmaction2 model to onnxruntime model with dynamic shape
|
||||
python tools/deploy.py \
|
||||
configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py \
|
||||
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb \
|
||||
tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220906-cd10898e.pth \
|
||||
tests/data/arm_wrestling.mp4 \
|
||||
--work-dir mmdeploy_models/mmaction/tsn/ort \
|
||||
--device cpu \
|
||||
--show \
|
||||
--dump-info
|
||||
```
|
||||
|
||||
## 模型规范
|
||||
|
||||
在使用转换后的模型进行推理之前,有必要了解转换结果的结构。 它存放在 `--work-dir` 指定的路路径下。
|
||||
|
||||
上例中的`mmdeploy_models/mmaction/tsn/ort`,结构如下:
|
||||
|
||||
```
|
||||
mmdeploy_models/mmaction/tsn/ort
|
||||
├── deploy.json
|
||||
├── detail.json
|
||||
├── end2end.onnx
|
||||
└── pipeline.json
|
||||
```
|
||||
|
||||
重要的是:
|
||||
|
||||
- **end2end.onnx**: 推理引擎文件。可用 ONNX Runtime 推理
|
||||
- \***.json**: mmdeploy SDK 推理所需的 meta 信息
|
||||
|
||||
整个文件夹被定义为**mmdeploy SDK model**。换言之,**mmdeploy SDK model**既包括推理引擎,也包括推理 meta 信息。
|
||||
|
||||
## 模型推理
|
||||
|
||||
### 后端模型推理
|
||||
|
||||
以上述模型转换后的 `end2end.onnx` 为例,你可以使用如下代码进行推理:
|
||||
|
||||
```python
|
||||
from mmdeploy.apis.utils import build_task_processor
|
||||
from mmdeploy.utils import get_input_shape, load_config
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
deploy_cfg = 'configs/mmaction/video-recognition/video-recognition_2d_onnxruntime_static.py'
|
||||
model_cfg = 'tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb'
|
||||
device = 'cpu'
|
||||
backend_model = ['./mmdeploy_models/mmaction2/tsn/ort/end2end.onnx']
|
||||
image = 'tests/data/arm_wrestling.mp4'
|
||||
|
||||
# read deploy_cfg and model_cfg
|
||||
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
|
||||
|
||||
# build task and backend model
|
||||
task_processor = build_task_processor(model_cfg, deploy_cfg, device)
|
||||
model = task_processor.build_backend_model(backend_model)
|
||||
|
||||
# process input image
|
||||
input_shape = get_input_shape(deploy_cfg)
|
||||
model_inputs, _ = task_processor.create_input(image, input_shape)
|
||||
|
||||
# do model inference
|
||||
with torch.no_grad():
|
||||
result = model.test_step(model_inputs)
|
||||
|
||||
# show top5-results
|
||||
pred_scores = result[0].pred_scores.item.tolist()
|
||||
top_index = np.argsort(pred_scores)[::-1]
|
||||
for i in range(5):
|
||||
index = top_index[i]
|
||||
print(index, pred_scores[index])
|
||||
```
|
||||
|
||||
### SDK 模型推理
|
||||
|
||||
你也可以参考如下代码,对 SDK model 进行推理:
|
||||
|
||||
#### 视频分类 SDK 模型推理
|
||||
|
||||
```python
|
||||
from mmdeploy_python import VideoRecognizer
|
||||
import cv2
|
||||
|
||||
# refer to demo/python/video_recognition.py
|
||||
# def SampleFrames(cap, clip_len, frame_interval, num_clips):
|
||||
# ...
|
||||
|
||||
cap = cv2.VideoCapture('tests/data/arm_wrestling.mp4')
|
||||
|
||||
clips, info = SampleFrames(cap, 1, 1, 25)
|
||||
|
||||
# create a recognizer
|
||||
recognizer = VideoRecognizer(model_path='./mmdeploy_models/mmaction/tsn/ort', device_name='cpu', device_id=0)
|
||||
# perform inference
|
||||
result = recognizer(clips, info)
|
||||
# show inference result
|
||||
for label_id, score in result:
|
||||
print(label_id, score)
|
||||
```
|
||||
|
||||
除了python API,mmdeploy SDK 还提供了诸如 C、C++、C#、Java等多语言接口。
|
||||
你可以参考[样例](https://github.com/open-mmlab/mmdeploy/tree/dev-1.x/demo)学习其他语言接口的使用方法。
|
||||
|
||||
> mmaction2 的 C#,Java接口待开发
|
||||
|
||||
## 模型支持列表
|
||||
|
||||
| Model | TorchScript | ONNX Runtime | TensorRT | ncnn | PPLNN | OpenVINO |
|
||||
| :-------------------------------------------------------------------------------------------- | :---------: | :----------: | :------: | :--: | :---: | :------: |
|
||||
| [TSN](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/tsn) | N | Y | Y | N | N | N |
|
||||
| [SlowFast](https://github.com/open-mmlab/mmaction2/tree/dev-1.x/configs/recognition/slowfast) | N | Y | Y | N | N | N |
|
|
@ -164,8 +164,9 @@ def get_preprocess(deploy_cfg: mmengine.Config, model_cfg: mmengine.Config,
|
|||
for transform in transforms:
|
||||
if transform['type'] == 'Normalize':
|
||||
transform['to_float'] = False
|
||||
assert transforms[0]['type'] == 'LoadImageFromFile', 'The first item'\
|
||||
' type of pipeline should be LoadImageFromFile'
|
||||
if transforms[0]['type'] != 'Lift':
|
||||
assert transforms[0]['type'] == 'LoadImageFromFile', \
|
||||
'The first item type of pipeline should be LoadImageFromFile'
|
||||
return dict(
|
||||
type='Task',
|
||||
module='Transform',
|
||||
|
@ -244,7 +245,8 @@ def get_pipeline(deploy_cfg: mmengine.Config, model_cfg: mmengine.Config,
|
|||
task = get_task_type(deploy_cfg)
|
||||
input_names = preprocess['input']
|
||||
output_names = postprocess['output']
|
||||
if task == Task.CLASSIFICATION or task == Task.SUPER_RESOLUTION:
|
||||
if task == Task.CLASSIFICATION or task == Task.SUPER_RESOLUTION \
|
||||
or Task.VIDEO_RECOGNITION:
|
||||
postprocess['input'] = infer_info['output']
|
||||
else:
|
||||
postprocess['input'] = preprocess['output'] + infer_info['output']
|
||||
|
|
|
@ -111,11 +111,16 @@ class VideoRecognition(BaseTask):
|
|||
nn.Module: An initialized backend model.
|
||||
"""
|
||||
from .video_recognition_model import build_video_recognition_model
|
||||
data_preprocessor = self.model_cfg.model.data_preprocessor
|
||||
data_preprocessor.setdefault('type', 'mmaction.ActionDataPreprocessor')
|
||||
model = build_video_recognition_model(
|
||||
model_files, self.model_cfg, self.deploy_cfg, device=self.device)
|
||||
model_files,
|
||||
self.model_cfg,
|
||||
self.deploy_cfg,
|
||||
device=self.device,
|
||||
data_preprocessor=data_preprocessor)
|
||||
model.to(self.device)
|
||||
model.eval()
|
||||
return model
|
||||
return model.eval()
|
||||
|
||||
def create_input(self,
|
||||
imgs: Union[str, np.ndarray],
|
||||
|
@ -242,7 +247,7 @@ class VideoRecognition(BaseTask):
|
|||
"""
|
||||
return input_data['inputs']
|
||||
|
||||
def get_preprocess(self) -> Dict:
|
||||
def get_preprocess(self, *args, **kwargs) -> Dict:
|
||||
"""Get the preprocess information for SDK.
|
||||
|
||||
Return:
|
||||
|
@ -250,19 +255,70 @@ class VideoRecognition(BaseTask):
|
|||
"""
|
||||
input_shape = get_input_shape(self.deploy_cfg)
|
||||
model_cfg = process_model_config(self.model_cfg, [''], input_shape)
|
||||
preprocess = model_cfg.test_pipeline
|
||||
return preprocess
|
||||
pipeline = model_cfg.test_pipeline
|
||||
data_preprocessor = self.model_cfg.model.data_preprocessor
|
||||
|
||||
def get_postprocess(self) -> Dict:
|
||||
lift = dict(type='Lift', transforms=[])
|
||||
lift['transforms'].append(dict(type='LoadImageFromFile'))
|
||||
transforms2index = {}
|
||||
for i, trans in enumerate(pipeline):
|
||||
transforms2index[trans['type']] = i
|
||||
lift_key = [
|
||||
'Resize', 'Normalize', 'TenCrop', 'ThreeCrop', 'CenterCrop'
|
||||
]
|
||||
for key in lift_key:
|
||||
if key == 'Normalize':
|
||||
assert key not in transforms2index
|
||||
mean = data_preprocessor.get('mean', [0, 0, 0])
|
||||
std = data_preprocessor.get('std', [1, 1, 1])
|
||||
trans = dict(type='Normalize', mean=mean, std=std, to_rgb=True)
|
||||
lift['transforms'].append(trans)
|
||||
if key in transforms2index:
|
||||
index = transforms2index[key]
|
||||
if key == 'Resize' and 'scale' in pipeline[index]:
|
||||
value = pipeline[index].pop('scale')
|
||||
if len(value) == 2 and value[0] == -1:
|
||||
value = value[::-1]
|
||||
pipeline[index]['size'] = value
|
||||
lift['transforms'].append(pipeline[index])
|
||||
|
||||
meta_keys = [
|
||||
'valid_ratio', 'flip', 'img_norm_cfg', 'filename', 'ori_shape',
|
||||
'pad_shape', 'img_shape', 'flip_direction', 'scale_factor',
|
||||
'ori_filename'
|
||||
]
|
||||
other = []
|
||||
must_key = ['FormatShape', 'PackActionInputs']
|
||||
for key in must_key:
|
||||
assert key in transforms2index
|
||||
index = transforms2index[key]
|
||||
if key == 'PackActionInputs':
|
||||
if 'meta_keys' in pipeline[index]:
|
||||
meta_keys += pipeline[index]['meta_keys']
|
||||
pipeline[index]['meta_keys'] = list(set(meta_keys))
|
||||
pipeline[index]['keys'] = ['img']
|
||||
pipeline[index]['type'] = 'Collect'
|
||||
other.append(pipeline[index])
|
||||
|
||||
reorder = [lift, *other]
|
||||
return reorder
|
||||
|
||||
def get_postprocess(self, *args, **kwargs) -> Dict:
|
||||
"""Get the postprocess information for SDK.
|
||||
|
||||
Return:
|
||||
dict: Composed of the postprocess information.
|
||||
"""
|
||||
postprocess = self.model_cfg.model.cls_head
|
||||
assert 'cls_head' in self.model_cfg.model
|
||||
assert 'num_classes' in self.model_cfg.model.cls_head
|
||||
logger = get_root_logger()
|
||||
logger.warning('use default top-k value 1')
|
||||
num_classes = self.model_cfg.model.cls_head.num_classes
|
||||
params = dict(topk=1, num_classes=num_classes)
|
||||
postprocess = dict(type='BaseHead', params=params)
|
||||
return postprocess
|
||||
|
||||
def get_model_name(self) -> str:
|
||||
def get_model_name(self, *args, **kwargs) -> str:
|
||||
"""Get the model name.
|
||||
|
||||
Return:
|
||||
|
|
|
@ -9,6 +9,7 @@ from mmengine import Config
|
|||
from mmengine.model import BaseDataPreprocessor
|
||||
from mmengine.registry import Registry
|
||||
from mmengine.structures import BaseDataElement, LabelData
|
||||
from torch import nn
|
||||
|
||||
from mmdeploy.codebase.base import BaseBackendModel
|
||||
from mmdeploy.utils import (Backend, get_backend, get_codebase_config,
|
||||
|
@ -37,19 +38,11 @@ class End2EndModel(BaseBackendModel):
|
|||
backend_files: Sequence[str],
|
||||
device: str,
|
||||
deploy_cfg: Union[str, Config] = None,
|
||||
model_cfg: Union[str, Config] = None,
|
||||
data_preprocessor: Optional[Union[dict, nn.Module]] = None,
|
||||
**kwargs):
|
||||
super(End2EndModel, self).__init__(deploy_cfg=deploy_cfg)
|
||||
model_cfg, deploy_cfg = load_config(model_cfg, deploy_cfg)
|
||||
from mmaction.registry import MODELS
|
||||
preprocessor_cfg = model_cfg.model.get('data_preprocessor', None)
|
||||
if preprocessor_cfg is not None:
|
||||
self.data_preprocessor = MODELS.build(
|
||||
model_cfg.model.data_preprocessor)
|
||||
else:
|
||||
self.data_preprocessor = BaseDataPreprocessor()
|
||||
super(End2EndModel, self).__init__(
|
||||
deploy_cfg=deploy_cfg, data_preprocessor=data_preprocessor)
|
||||
self.deploy_cfg = deploy_cfg
|
||||
self.model_cfg = model_cfg
|
||||
self._init_wrapper(
|
||||
backend=backend,
|
||||
backend_files=backend_files,
|
||||
|
@ -114,10 +107,14 @@ class End2EndModel(BaseBackendModel):
|
|||
return data_samples
|
||||
|
||||
|
||||
def build_video_recognition_model(model_files: Sequence[str],
|
||||
model_cfg: Union[str, mmengine.Config],
|
||||
deploy_cfg: Union[str, mmengine.Config],
|
||||
device: str, **kwargs):
|
||||
def build_video_recognition_model(
|
||||
model_files: Sequence[str],
|
||||
model_cfg: Union[str, mmengine.Config],
|
||||
deploy_cfg: Union[str, mmengine.Config],
|
||||
device: str,
|
||||
data_preprocessor: Optional[Union[Config,
|
||||
BaseDataPreprocessor]] = None,
|
||||
**kwargs):
|
||||
"""Build video recognition model for different backends.
|
||||
|
||||
Args:
|
||||
|
@ -127,6 +124,8 @@ def build_video_recognition_model(model_files: Sequence[str],
|
|||
deploy_cfg (str | mmengine.Config): Input deployment config file or
|
||||
Config object.
|
||||
device (str): Device to input model.
|
||||
data_preprocessor (BaseDataPreprocessor | Config): The data
|
||||
preprocessor of the model.
|
||||
|
||||
Returns:
|
||||
BaseBackendModel: Video recognizer for a configured backend.
|
||||
|
@ -144,7 +143,7 @@ def build_video_recognition_model(model_files: Sequence[str],
|
|||
backend_files=model_files,
|
||||
device=device,
|
||||
deploy_cfg=deploy_cfg,
|
||||
model_cfg=model_cfg,
|
||||
data_preprocessor=data_preprocessor,
|
||||
**kwargs))
|
||||
|
||||
return backend_video_recognizer
|
||||
|
|
|
@ -85,7 +85,9 @@ SDK_TASK_MAP = {
|
|||
Task.POSE_DETECTION:
|
||||
dict(component='Detector', cls_name='PoseDetector'),
|
||||
Task.ROTATED_DETECTION:
|
||||
dict(component='ResizeRBBox', cls_name='RotatedDetector')
|
||||
dict(component='ResizeRBBox', cls_name='RotatedDetector'),
|
||||
Task.VIDEO_RECOGNITION:
|
||||
dict(component='BaseHead', cls_name='VideoRecognizer')
|
||||
}
|
||||
|
||||
TENSORRT_MAX_TOPK = 3840
|
||||
|
|
Binary file not shown.
|
@ -771,7 +771,7 @@ def get_backend_result(pipeline_info: dict, model_cfg_path: Path,
|
|||
|
||||
if sdk_config is not None:
|
||||
|
||||
if codebase_name == 'mmcls':
|
||||
if codebase_name == 'mmcls' or codebase_name == 'mmaction':
|
||||
replace_top_in_pipeline_json(backend_output_path, logger)
|
||||
|
||||
log_path = gen_log_path(
|
||||
|
|
Loading…
Reference in New Issue