bmcv_nms_yolo
该接口目前支持yolov3/yolov7,用于消除网络计算得到过多的物体框,并找到最佳物体框。
处理器型号支持:
该接口支持BM1684/BM1684X。
接口形式:
bm_status_t bmcv_nms_yolo( bm_handle_t handle, int input_num, bm_device_mem_t bottom[3], int batch_num, int hw_shape[3][2], int num_classes, int num_boxes, int mask_group_size, float nms_threshold, float confidence_threshold, int keep_top_k, float bias[18], float anchor_scale[3], float mask[9], bm_device_mem_t output, int yolo_flag, int len_per_batch, void *ext)
参数说明:
bm_handle_t handle
输入参数。bm_handle 句柄。
int input_num
输入参数。输入feature map数量。
bm_device_mem_t bottom[3]
输入参数。bottom的设备地址,需要调用 bm_mem_from_system()将数据地址转化成转化为 bm_device_mem_t 所对应的结构。
int batch_num
输入参数。batch 的数量。
int hw_shape[3][2]
输入参数。输入feature map的h、w。
int num_classes
输入参数。图片的类别数量。
int num_boxes
输入参数。每个网格包含多少个不同尺度的anchor box。
int mask_group_size
输入参数。掩膜的尺寸。
float nms_threshold
输入参数。过滤物体框的阈值,分数小于该阈值的物体框将会被过滤掉。
int confidence_threshold
输入参数。置信度。
int keep_top_k
输入参数。保存前 k 个数。
int bias[18]
输入参数。偏置。
float anchor_scale[3]
输入参数。anchor的尺寸。
float mask[9]
输入参数。掩膜。
bm_device_mem_t output
输入参数。输出的设备地址,需要调用 bm_mem_from_system()将数据地址转化成转化为 bm_device_mem_t 所对应的结构。
int yolo_flag
输入参数。yolov3时yolo_flag=0,yolov7时yolo_flag=2。
int len_per_batch
输入参数。该参数无效,仅为了维持接口的兼容性。
int scale
输入参数。目标尺寸。该参数仅在yolov7中生效。
int *orig_image_shape
输入参数。原始图片的w/h, 按batch排布,比如batch4: w1 h1 w2 h2 w3 h3 w4 h4。该参数仅在yolov7中生效。
int model_h
输入参数。模型的shape h,该参数仅在yolov7中生效。
int model_w
输入参数。模型的shape w,该参数仅在yolov7中生效。
void *ext
预留参数。如果需要新增参数,可以在这里新增。yolov7 中新增了4个参数为:
typedef struct yolov7_info{ int scale; int *orig_image_shape; int model_h; int model_w; } yolov7_info_t;
上面结构体中,int scale:scale_flag。int* orig_image_shape:原始图片的w/h, 按batch排布,比如batch4: w1 h1 w2 h2 w3 h3 w4 h4。int model_h:模型的shape h。int model_w:模型的shape w。这些参数仅在yolov7中生效。
返回值:
BM_SUCCESS: 成功
其他: 失败
代码示例:
#include <time.h> #include <random> #include <algorithm> #include <map> #include <vector> #include <iostream> #include <cmath> #include <getopt.h> #include "bmcv_api_ext.h" #include "bmcv_common_bm1684.h" #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include <iostream> #include <new> #include <fstream> #define KEEP_TOP_K 200 #define Dtype float #define TIME_PROFILE typedef struct yolov7_info{ int scale; int *orig_image_shape; int model_h; int model_w; } yolov7_info_t; int main(int argc, char *argv[]) { int DEV_ID = 0; int H = 16, W = 30; int bottom_num = 3; int dev_count; int f_data_from_file = 0; int f_tpu_forward = 1; bm_status_t ret = BM_SUCCESS; int batch_num = 32; int num_classes = 6; int num_boxes = 3; int yolo_flag = 0; //yolov3: 0, yolov7: 2 int len_per_batch = 0; int keep_top_k = 100; float nms_threshold = 0.1; float conf_threshold = 0.98f; int mask_group_size = 3; float bias[18] = {10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326}; float anchor_scale[3] = {32, 16, 8}; float mask[9] = {6, 7, 8, 3, 4, 5, 0, 1, 2}; int scale = 0; //for yolov7 post handle int model_h = 0; int model_w = 0; int mode_value_end = 0; bm_dev_request(&handle, 0); int hw_shape[3][2] = { {H*1, W*1}, {H*2, W*2}, {H*4, W*4}, }; int size_bottom[3]; float* data_bottom[3]; int origin_image_shape[batch_num * 2] = {0}; if (yolo_flag == 1){ num_boxes = 1; len_per_batch = 12096 * 18; bottom_num = 1; } else if (yolo_flag == 2){ //yolov7 post handle; num_boxes = 1; bottom_num = 3; mask_group_size = 1; scale = 1; model_h = 512; model_w = 960; for (int i = 0 ; i < 3; i++){ mask[i] = i; } for (int i = 0; i < 6; i++) bias[i] = 1; for (int i = 0; i < 3; i++) anchor_scale[i] = 1; for (int i = 0; i < batch_num; i++){ origin_image_shape[i*2 + 0] = 1920; origin_image_shape[i*2 + 1] = 1080; } } // alloc input data for (int i = 0; i < 3; ++i) { if (yolo_flag == 1){ size_bottom[i] = batch_num * len_per_batch; } else { size_bottom[i] = batch_num * num_boxes * (num_classes + 5) * hw_shape[i][0] * hw_shape[i][1]; } try { data_bottom[i] = new float[size_bottom[i]]; } catch(std::bad_alloc &memExp) { std::cerr<<memExp.what()<<std::endl; exit(-1); } } if (f_data_from_file) { #if defined(__aarch64__) #define DIR "./imgs/" #else #define DIR "test/test_api_bmdnn/bm1684/imgs/" #endif printf("reading data from: \"" DIR "\"\n"); char path[256]; if (yolo_flag == 1) { FILE* fp = fopen("./output_ref_data.dat.bmrt", "rb"); size_t cnt = fread(data_bottom[0], sizeof(float), size_bottom[0]*batch_num, fp); cnt = cnt; fclose(fp); } else { for (int i = 0; i < batch_num; ++i) { sprintf(path, DIR "b%d_13.bin", i); FILE* fp = fopen(path, "rb"); size_t cnt = fread(data_bottom[0] + i * size_bottom[0] / batch_num, sizeof(float), size_bottom[0] / batch_num, fp); cnt = cnt; fclose(fp); sprintf(path, DIR "b%d_26.bin", i); fp = fopen(path, "rb"); cnt = fread(data_bottom[1] + i * size_bottom[1] / batch_num, sizeof(float), size_bottom[1] / batch_num, fp); cnt = cnt; fclose(fp); sprintf(path, DIR "b%d_52.bin", i); fp = fopen(path, "rb"); cnt = fread(data_bottom[2] + i * size_bottom[2] / batch_num, sizeof(float), size_bottom[2] / batch_num, fp); cnt = cnt; fclose(fp); } } } else { ofstream file_1("1.txt", std::ios::out); ofstream file_2("2.txt", std::ios::out); ofstream file_3("3.txt", std::ios::out); std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<> dist(0, 1); // alloc and init input data for (int j = 0; j < size_bottom[0]; ++j){ if (yolo_flag == 2){ data_bottom[0][j] = dist(gen); } else { data_bottom[0][j] = (rand() % 1000 - 999.0f) / (124.0f); } file_1 << data_bottom[0][j] <<endl; } for (int j = 0; j < size_bottom[1]; ++j){ if (yolo_flag == 2){ data_bottom[1][j] = dist(gen); } else { data_bottom[1][j] = (rand() % 1000 - 999.0f) / (124.0f); } file_2 << data_bottom[1][j] <<endl; } for (int j = 0; j < size_bottom[2]; ++j){ if (yolo_flag == 2){ data_bottom[2][j] = dist(gen); } else { data_bottom[2][j] = (rand() % 1000 - 999.0f) / (124.0f); } file_3 << data_bottom[2][j] <<endl; } } // alloc output data float* output_bmdnn; float* output_native; try { output_bmdnn = new float[output_size]; output_native = new float[output_size]; } catch(std::bad_alloc &memExp) { std::cerr<<memExp.what()<<std::endl; exit(-1); } memset(output_bmdnn, 0, output_size * sizeof(float)); memset(output_native, 0, output_size * sizeof(float)); bm_dev_request(&handle, 0); bm_device_mem_t bottom[3] = { bm_mem_from_system((void*)data_bottom[0]), bm_mem_from_system((void*)data_bottom[1]), bm_mem_from_system((void*)data_bottom[2]) }; yolov7_info_t *ext = (yolov7_info_t*) malloc (sizeof(yolov7_info_t)); ext->scale = scale; ext->orig_image_shape = origin_image_shape; ext->model_h = model_h; ext->model_w = model_w; ret = bmcv_nms_yolo( handle, bottom_num, bottom, batch_num, hw_shape, num_classes, num_boxes, mask_group_size, nms_threshold, conf_threshold, keep_top_k, bias, anchor_scale, mask, bm_mem_from_system((void*)output_bmdnn), yolo_flag, len_per_batch, (void*)ext); return 0; }