mirror of https://github.com/open-mmlab/mmcv.git
[NPU] add npu ops and check (#3267)
* add npu ops and check * lint fix * lint fix 2 * delete and fix * fix 3pull/2976/merge
parent
cf1c168fcb
commit
6d33b9f650
|
@ -21,3 +21,24 @@ void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
|
|||
|
||||
REGISTER_NPU_IMPL(assign_score_withk_forward_impl,
|
||||
assign_score_withk_forward_npu);
|
||||
|
||||
void assign_score_withk_backward_npu(
|
||||
int B, int N0, int N1, int M, int K, int O, int aggregate,
|
||||
const Tensor& grad_out, const Tensor& points, const Tensor& centers,
|
||||
const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
|
||||
Tensor& grad_centers, Tensor& grad_scores) {
|
||||
at::Tensor grad_out_trans = grad_out.permute({0, 2, 3, 1});
|
||||
|
||||
EXEC_NPU_CMD(aclnnAssignScoreWithkGrad, grad_out_trans, points, centers,
|
||||
scores, knn_idx, B, N0, N1, M, K, O, aggregate, grad_scores,
|
||||
grad_points, grad_centers);
|
||||
}
|
||||
|
||||
void assign_score_withk_backward_impl(
|
||||
int B, int N0, int N1, int M, int K, int O, int aggregate,
|
||||
const Tensor& grad_out, const Tensor& points, const Tensor& centers,
|
||||
const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
|
||||
Tensor& grad_centers, Tensor& grad_scores);
|
||||
|
||||
REGISTER_NPU_IMPL(assign_score_withk_backward_impl,
|
||||
assign_score_withk_backward_npu);
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
#include "pytorch_npu_helper.hpp"
|
||||
|
||||
using namespace NPU_NAME_SPACE;
|
||||
using namespace std;
|
||||
|
||||
vector<vector<float>> pixel_group_npu(Tensor score, Tensor mask,
|
||||
Tensor embedding, Tensor kernel_label,
|
||||
Tensor kernel_contour,
|
||||
int kernel_region_num,
|
||||
float distance_threshold) {
|
||||
TORCH_CHECK(score.dim() == 2,
|
||||
"score.dim() must be 2, but got: ", score.dim());
|
||||
TORCH_CHECK(mask.dim() == 2, "mask.dim() must be 2, but got: ", mask.dim());
|
||||
TORCH_CHECK(embedding.dim() == 3,
|
||||
"embedding.dim() must be 3, but got: ", embedding.dim());
|
||||
TORCH_CHECK(kernel_label.dim() == 2,
|
||||
"kernel_label.dim() must be 2, but got: ", kernel_label.dim());
|
||||
TORCH_CHECK(
|
||||
kernel_contour.dim() == 2,
|
||||
"kernel_contour.dim() must be 2, but got: ", kernel_contour.dim());
|
||||
|
||||
auto label_size = kernel_label.sizes();
|
||||
auto height = label_size[0];
|
||||
auto width = label_size[1];
|
||||
|
||||
c10::SmallVector<int64_t, 8> point_vector_size = {kernel_region_num, 2};
|
||||
c10::SmallVector<int64_t, 8> label_updated_size = {height, width};
|
||||
at::Tensor point_vector = at::zeros(point_vector_size, score.options());
|
||||
at::Tensor label_updated =
|
||||
at::empty(label_updated_size, kernel_label.options());
|
||||
|
||||
EXEC_NPU_CMD(aclnnPixelGroup, score, mask, embedding, kernel_label,
|
||||
kernel_contour, kernel_region_num, distance_threshold,
|
||||
point_vector, label_updated);
|
||||
|
||||
std::vector<std::vector<float>> pixel_assignment(kernel_region_num);
|
||||
at::Tensor point_vector_cpu = point_vector.to(at::kCPU);
|
||||
at::Tensor label_updated_cpu = label_updated.to(at::kCPU);
|
||||
|
||||
for (int32_t l = 0; l < kernel_region_num; l++) {
|
||||
pixel_assignment[l].push_back(point_vector_cpu[l][0].item<float>());
|
||||
pixel_assignment[l].push_back(point_vector_cpu[l][1].item<float>());
|
||||
if (pixel_assignment[l][1] > 0) {
|
||||
pixel_assignment[l][0] /= pixel_assignment[l][1];
|
||||
}
|
||||
if (l > 0) {
|
||||
at::Tensor valid_mask = (label_updated_cpu == l);
|
||||
at::Tensor indices = at::nonzero(valid_mask);
|
||||
for (int32_t i = 0; i < indices.size(0); i++) {
|
||||
auto x = indices[i][0].item<int32_t>();
|
||||
auto y = indices[i][1].item<int32_t>();
|
||||
pixel_assignment[l].push_back(y);
|
||||
pixel_assignment[l].push_back(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
return pixel_assignment;
|
||||
}
|
||||
|
||||
vector<vector<float>> pixel_group_impl(Tensor score, Tensor mask,
|
||||
Tensor embedding, Tensor kernel_label,
|
||||
Tensor kernel_contour,
|
||||
int kernel_region_num,
|
||||
float distance_threshold);
|
||||
|
||||
REGISTER_NPU_IMPL(pixel_group_impl, pixel_group_npu);
|
|
@ -8,6 +8,8 @@ void roi_align_forward_npu(Tensor input, Tensor rois, Tensor output,
|
|||
Tensor argmax_y, Tensor argmax_x, int aligned_height,
|
||||
int aligned_width, float spatial_scale,
|
||||
int sampling_ratio, int pool_mode, bool aligned) {
|
||||
TORCH_CHECK(input.scalar_type() == at::kFloat,
|
||||
"input should be a float tensor");
|
||||
int64_t roi_end_mode = 2;
|
||||
if (!aligned) {
|
||||
LOG(WARNING) << "The [aligned] attr in roi_align op is false";
|
||||
|
@ -34,6 +36,8 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
|
|||
int aligned_height, int aligned_width,
|
||||
float spatial_scale, int sampling_ratio,
|
||||
int pool_mode, bool aligned) {
|
||||
TORCH_CHECK(grad_output.scalar_type() == at::kFloat,
|
||||
"input should be a float tensor");
|
||||
int64_t aligned_height_64 = aligned_height;
|
||||
int64_t aligned_width_64 = aligned_width;
|
||||
int64_t sampling_ratio_64 = sampling_ratio;
|
||||
|
|
|
@ -11,6 +11,11 @@ int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
|
|||
const int max_points, const int max_voxels,
|
||||
const int NDim = 3);
|
||||
|
||||
void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
|
||||
const std::vector<float> voxel_size,
|
||||
const std::vector<float> coors_range,
|
||||
const int NDim = 3);
|
||||
|
||||
int hard_voxelize_forward_npu(const at::Tensor &points, at::Tensor &voxels,
|
||||
at::Tensor &coors,
|
||||
at::Tensor &num_points_per_voxel,
|
||||
|
@ -53,4 +58,34 @@ int hard_voxelize_forward_npu(const at::Tensor &points, at::Tensor &voxels,
|
|||
return voxel_num_int;
|
||||
}
|
||||
|
||||
void dynamic_voxelize_forward_npu(const at::Tensor &points, at::Tensor &coors,
|
||||
const std::vector<float> voxel_size,
|
||||
const std::vector<float> coors_range,
|
||||
const int NDim = 3) {
|
||||
uint32_t ptsNum = points.size(0);
|
||||
uint32_t ptsFeature = points.size(1);
|
||||
at::Tensor ptsTrans = at::transpose(points, 0, 1);
|
||||
double coors_min_x = coors_range[0];
|
||||
double coors_min_y = coors_range[1];
|
||||
double coors_min_z = coors_range[2];
|
||||
double coors_max_x = coors_range[3];
|
||||
double coors_max_y = coors_range[4];
|
||||
double coors_max_z = coors_range[5];
|
||||
double voxel_x = voxel_size[0];
|
||||
double voxel_y = voxel_size[1];
|
||||
double voxel_z = voxel_size[2];
|
||||
int grid_x = std::round((coors_max_x - coors_min_x) / voxel_x);
|
||||
int grid_y = std::round((coors_max_y - coors_min_y) / voxel_y);
|
||||
int grid_z = std::round((coors_max_z - coors_min_z) / voxel_z);
|
||||
|
||||
at::Tensor tmp_coors =
|
||||
at::zeros({3, ptsNum}, points.options().dtype(at::kInt));
|
||||
EXEC_NPU_CMD(aclnnDynamicVoxelization, ptsTrans, coors_min_x, coors_min_y,
|
||||
coors_min_z, voxel_x, voxel_y, voxel_z, grid_x, grid_y, grid_z,
|
||||
tmp_coors);
|
||||
tmp_coors.transpose_(0, 1);
|
||||
coors.copy_(tmp_coors);
|
||||
}
|
||||
|
||||
REGISTER_NPU_IMPL(hard_voxelize_forward_impl, hard_voxelize_forward_npu);
|
||||
REGISTER_NPU_IMPL(dynamic_voxelize_forward_impl, dynamic_voxelize_forward_npu);
|
||||
|
|
Loading…
Reference in New Issue