[Feature] Add getJobLimitCapability interface and use it in nms (#2337)

Co-authored-by: budefei <budefei@cambricon.com>
pull/2345/head
bdf 2022-10-18 14:24:27 +08:00 committed by GitHub
parent bd1da5ab47
commit be684eeb4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 2 deletions

View File

@ -25,6 +25,16 @@
#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y))
inline int32_t getJobLimitCapability() {
CNcontext drv_ctx;
CNctxConfigParam ctx_conf_param;
TORCH_CHECK(
CN_SUCCESS == cnGetCtxConfigParam(drv_ctx, CN_CTX_CONFIG_UNION_LIMIT,
&ctx_conf_param),
"cnGetCtxConfigParam fails.");
return (int32_t)ctx_conf_param.unionLimit;
}
#endif // MMCV_WITH_MLU
#endif // PYTORCH_MLU_HELPER_HPP_

View File

@ -34,8 +34,7 @@ static cnnlStatus_t policyFunc(cnrtDim3_t *k_dim, cnrtFunctionType_t *k_type,
int &core_num_per_class,
const int input_box_num) {
uint32_t core_dim = torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster);
uint32_t cluster_number = torch_mlu::getDeviceAttr(cnrtAttrClusterCount);
uint32_t job_limit = cluster_number * core_dim;
uint32_t job_limit = getJobLimitCapability();
uint32_t core_number = job_limit;
int box_num_per_core = (input_box_num + core_number - 1) / core_number;