3、picodet c++版onnxruntime推理及reshape和transpose的c++实现
k是每个检测头最后的大小,如输入是256,每个头的stride分别是[8,16,32,64],那么应有k就是[32,16,8,4],直接c++实现,有些搞不明白,先用python来实现。k ,这个m就是冒号这里的位置,取m=0,1,2,3,4,5,6,7,分别表示这一维度下的8个。关于c代码,可以查看我上边的PicoDet::generate_proposal里代码。4x8xkxk对应的是pyth
文章目录
1、完整onnx c++推理
这里指的完整是指在用paddle export.py benchmark=True时的导出的模型,模型没有post和nms,推理方法可以直接参考:https://github.com/hpc203/picodet-onnxruntime
我做了一点小修改,代码如下:
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
//#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>
//using namespace cv;
//using namespace std;
//using namespace Ort;
typedef struct BoxInfo
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class PicoDet
{
public:
PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold);
void detect(cv::Mat& cv_image);
private:
float score_threshold = 0.5;
float nms_threshold = 0.5;
std::vector<std::string> class_names;
int num_class;
cv::Mat resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left);
std::vector<float> input_image_;
void normalize_(cv::Mat img);
void softmax_(const float* x, float* y, int length);
void generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box);
void nms(std::vector<BoxInfo>& input_boxes);
const bool keep_ratio = false;
int inpWidth;
int inpHeight;
int num_outs;
int reg_max;
std::vector<int> stride;
//const float mean[3] = { 103.53, 116.28, 123.675 };
//const float stds[3] = { 57.375, 57.12, 58.395 };
const float mean[3] = { 0.0, 0.0, 0.0 };
const float stds[3] = { 255.0, 255.0, 255.0 };
Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "picodet");
Ort::Session* ort_session = nullptr;
Ort::SessionOptions sessionOptions = Ort::SessionOptions();
std::vector<char*> input_names;
std::vector<char*> output_names;
std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputs
std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
};
PicoDet::PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold)
{
std::ifstream ifs(classesFile.c_str());
std::string line;
while (std::getline(ifs, line)) this->class_names.push_back(line);
this->num_class = class_names.size();
this->nms_threshold = nms_threshold;
this->score_threshold = objThreshold;
std::wstring widestr = std::wstring(model_path.begin(), model_path.end());
//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
ort_session = new Ort::Session(env, widestr.c_str(), sessionOptions);
size_t numInputNodes = ort_session->GetInputCount();
size_t numOutputNodes = ort_session->GetOutputCount();
Ort::AllocatorWithDefaultOptions allocator;
for (int i = 0; i < numInputNodes; i++)
{
input_names.push_back(ort_session->GetInputName(i, allocator));
Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_node_dims.push_back(input_dims);
}
for (int i = 0; i < numOutputNodes; i++)
{
output_names.push_back(ort_session->GetOutputName(i, allocator));
Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
output_node_dims.push_back(output_dims);
/*for (int j = 0; j < output_dims.size(); j++)
{
cout << output_dims[j] << ",";
}
cout << endl;*/
}
this->inpHeight = input_node_dims[0][2];
this->inpWidth = input_node_dims[0][3];
this->num_outs = int(numOutputNodes * 0.5);
this->reg_max = output_node_dims[this->num_outs][output_node_dims[this->num_outs].size() - 1] / 4 - 1;
for (int i = 0; i < this->num_outs; i++)
{
stride.push_back(int(8 * pow(2, i)));
}
}
cv::Mat PicoDet::resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left)
{
int srch = srcimg.rows, srcw = srcimg.cols;
*newh = this->inpHeight;
*neww = this->inpWidth;
cv::Mat dstimg;
if (this->keep_ratio && srch != srcw) {
float hw_scale = (float)srch / srcw;
if (hw_scale > 1) {
*newh = this->inpHeight;
*neww = int(this->inpWidth / hw_scale);
resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_LINEAR);
*left = int((this->inpWidth - *neww) * 0.5);
copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, cv::BORDER_CONSTANT, 0);
}
else {
*newh = (int)this->inpHeight * hw_scale;
*neww = this->inpWidth;
resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_LINEAR); //插值方式要与模型训练相一致
*top = (int)(this->inpHeight - *newh) * 0.5;
copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, cv::BORDER_CONSTANT, 0);
}
}
else {
cv::resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_LINEAR);
}
return dstimg;
}
void PicoDet::normalize_(cv::Mat img)
{
// img.convertTo(img, CV_32F);
int row = img.rows;
int col = img.cols;
this->input_image_.resize(row * col * img.channels());
for (int c = 0; c < 3; c++)
{
for (int i = 0; i < row; i++)
{
for (int j = 0; j < col; j++)
{
float pix = img.ptr<uchar>(i)[j * 3 + c];
this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - mean[c] / 255.0) / (stds[c] / 255.0);
//this->input_image_[c * row * col + i * col + j] = (pix - mean[c]) / stds[c];
}
}
}
}
void PicoDet::softmax_(const float* x, float* y, int length)
{
float sum = 0;
int i = 0;
for (i = 0; i < length; i++)
{
y[i] = exp(x[i]);
sum += y[i];
}
for (i = 0; i < length; i++)
{
y[i] /= sum;
}
}
void PicoDet::generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box)
{
const int num_grid_y = (int)ceil((float)this->inpHeight / stride_);
const int num_grid_x = (int)ceil((float)this->inpWidth / stride_);
cout << "num_grid_x=" << num_grid_x << ",num_grid_y=" << num_grid_y << endl;
const int reg_1max = reg_max + 1;
//std::cout << "score:" << std::endl;
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
int max_ind = 0;
float max_score = 0;
for (int k = 0; k < num_class; k++)
{
/*这个代码是原始的输出*/
float score = out_score[i * num_grid_x * num_class + j * num_class + k];
/*以下代码是去掉reshape和transpose的,用C来实现这个功能的这两部分代码选一个即可,可以理解成ijk对应kij*/
//float score = std::sqrt(out_score[k*num_grid_y*num_grid_x+i*num_grid_x+j]);
//std::cout <<score << " ";
if (score > max_score)
{
max_score = score;
max_ind = k;
}
}
if (max_score >= score_threshold)
{
std::cout << "box:" << std::endl;
//const float* pbox = out_box + idx * reg_1max * 4;
float dis_pred[4];
float* y = new float[reg_1max];
for (int k = 0; k < 4; k++)
{
/*原始模型*/
const float* tmp = out_box + i * num_grid_x * reg_1max * 4 + j * reg_1max * 4 + k * reg_1max;
//std::cout << "r:" << *tmp << std::endl;
/*换用没有reshape transpose的*/
//float* tmp = new float[reg_1max];
//for (int m = 0; m < reg_1max; m++)
//{
//tmp[m] = out_box[k * num_grid_y * num_grid_x * reg_1max + i * num_grid_x + j + m * num_grid_y * num_grid_x];
//}
//std::cout << "r:" << *tmp << std::endl;
softmax_(tmp, y, reg_1max);
float dis = 0.f;
for (int l = 0; l < reg_1max; l++)
{
dis += l * y[l];
}
dis_pred[k] = dis * stride_;
}
delete[] y;
float pb_cx = (j + 0.5f) * stride_ - 0.5;
float pb_cy = (i + 0.5f) * stride_ - 0.5;
float x0 = pb_cx - dis_pred[0];
float y0 = pb_cy - dis_pred[1];
float x1 = pb_cx + dis_pred[2];
float y1 = pb_cy + dis_pred[3];
generate_boxes.push_back(BoxInfo{ x0, y0, x1, y1, max_score, max_ind });
}
}
}
}
void PicoDet::nms(std::vector<BoxInfo>& input_boxes)
{
sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
std::vector<bool> isSuppressed(input_boxes.size(), false);
for (int i = 0; i < int(input_boxes.size()); ++i)
{
if (isSuppressed[i]) { continue; }
for (int j = i + 1; j < int(input_boxes.size()); ++j)
{
if (isSuppressed[j]) { continue; }
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= this->nms_threshold)
{
isSuppressed[j] = true;
}
}
}
// return post_nms;
int idx_t = 0;
input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
}
void PicoDet::detect(cv::Mat& srcimg)
{
int newh = 0, neww = 0, top = 0, left = 0;
cv::Mat cv_image = srcimg.clone();
cv::Mat dst = this->resize_image(cv_image, &newh, &neww, &top, &left);
this->normalize_(dst);
std::array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };
auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
std::vector<Ort::Value> ort_outputs = ort_session->Run(Ort::RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); // ???????
/generate proposals
std::vector<BoxInfo> generate_boxes;
for (int i = 0; i < this->num_outs; i++)
{
//auto cls_shape = this->output_node_dims[i];
const float* cls_score = ort_outputs[i].GetTensorMutableData<float>();
//std::vector<int64_t> new_cls_shape = { cls_shape[0],cls_shape[1],cls_shape[2] * cls_shape[3] };
const float* bbox_pred = ort_outputs[i + this->num_outs].GetTensorMutableData<float>();
//auto reg_shape = this->output_node_dims[i+this->num_outs];
generate_proposal(generate_boxes, stride[i], cls_score, bbox_pred);
}
Perform non maximum suppression to eliminate redundant overlapping boxes with
lower confidences
nms(generate_boxes);
float ratioh = (float)cv_image.rows / newh;
float ratiow = (float)cv_image.cols / neww;
for (size_t i = 0; i < generate_boxes.size(); ++i)
{
int xmin = (int)std::max((generate_boxes[i].x1 - left) * ratiow, 0.f);
int ymin = (int)std::max((generate_boxes[i].y1 - top) * ratioh, 0.f);
int xmax = (int)std::min((generate_boxes[i].x2 - left) * ratiow, (float)cv_image.cols);
int ymax = (int)std::min((generate_boxes[i].y2 - top) * ratioh, (float)cv_image.rows);
rectangle(srcimg, cv::Point(xmin, ymin), cv::Point(xmax, ymax), cv::Scalar(0, 0, 255), 2);
std::string label = cv::format("%.2f", generate_boxes[i].score);
label = this->class_names[generate_boxes[i].label] + ":" + label;
putText(srcimg, label, cv::Point(xmin, ymin - 5), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 255, 0), 1);
}
}
int main()
{
PicoDet mynet("picodet_xs_320_voc_256_20230405_shape.onnx", "ball.names", 0.5, 0.5); /// choice = ["picodet_m_320_coco.onnx", "picodet_m_416_coco.onnx", "picodet_s_320_coco.onnx", "picodet_s_416_coco.onnx"]
//PicoDet mynet("Cpicodet_xs_320_voc_256_20230405_shape_sim_prune.onnx", "ball.names", 0.5, 0.5);
std::string imgpath = "test.jpg";
cv::Mat bgrimg = cv::imread(imgpath,cv::IMREAD_COLOR);
cv::Mat rgbimg;
cv::cvtColor(bgrimg,rgbimg,cv::COLOR_BGR2RGB);
mynet.detect(rgbimg);
cv::Mat resultimg;
cv::cvtColor(rgbimg, resultimg, cv::COLOR_RGB2BGR);
cv::imwrite("test_result.jpg", resultimg);
static const std::string kWinName = "Deep learning object detection in ONNXRuntime";
cv::namedWindow(kWinName, cv::WINDOW_NORMAL);
cv::imshow(kWinName, resultimg);
cv::waitKey(0);
cv::destroyAllWindows();
}
2、裁剪后模型的推理
这里主要是把reshape和两个算子给去掉了,需要用代码来实现
这部分只要把裁剪的部分加下即可,我们用的是直接从原始模型上进行裁剪的,与原始模型相比,差的部分就是:
所以把这部分加上就可以了
对于这种reshape和transpose很多的加速芯片是无法使用的,要用cpu完成,所以把这些算子单独取出来看效果。
2.1 分类reshape和transpose用python模拟c++
如图,有两个头,分别是分类和位置回归。对于onnxruntime的输出形状 为1xcxkxk ->reshape->1xcx(kk)->transpose->1x(kk)c;
对于位置回归头 1x32xkxk->reshape->1x32x(kk)->transpose->1x(k*k)x32
k是每个检测头最后的大小,如输入是256,每个头的stride分别是[8,16,32,64],那么应有k就是[32,16,8,4],直接c++实现,有些搞不明白,先用python来实现。
我们选用k为4来做实验,类别数为2,batchsize为1。python要模拟c++,按照内存分布来说python要flatten,在内存中连续分布。
import numpy as np
num_grid_x=4 #宽
num_grid_y=4 #高
num_cls=2 #分类
a = np.arange(num_cls*num_grid_x*num_grid_y).reshape(num_cls,num_grid_y,num_grid_x) #裁剪后模型输出c,k,k
aa=a.flatten()
b = a.transpose(1,2,0) # reshape transpose
bb = b.flatten()
#以上部分a就是裁剪后的模型,b就是完整的,我们想用正常的访问b的方法来实现对a的访问,从而实现reshape和transpose
for i in range(num_grid_y):
for j in range(num_grid_x):
for k in range(num_cls):
print(b[i,j,k]," ",a[k,i,j])
0 0
16 16
1 1
17 17
2 2
18 18
3 3
19 19
4 4
20 20
5 5
21 21
6 6
22 22
7 7
23 23
8 8
24 24
9 9
25 25
10 10
26 26
11 11
27 27
12 12
28 28
13 13
29 29
14 14
30 30
15 15
31 31
可发看到结果是相同的,那么如果输出不是正方形,会怎么样,看下边代码:
num_grid_x=4 #宽
num_grid_y=3 #高
num_cls=2 #分类
a = np.arange(num_cls*num_grid_x*num_grid_y).reshape(num_cls,num_grid_y,num_grid_x) #裁剪后模型输出c,k,k
aa=a.flatten()
b = a.transpose(1,2,0) # reshape transpose
bb = b.flatten()
#以上部分a就是裁剪后的模型,b就是完整的,我们想用正常的访问b的方法来实现对a的访问,从而实现reshape和transpose
for i in range(num_grid_y):
for j in range(num_grid_x):
for k in range(num_cls):
print(b[i,j,k]," ",a[k,i,j])
0 0
12 12
1 1
13 13
2 2
14 14
3 3
15 15
4 4
16 16
5 5
17 17
6 6
18 18
7 7
19 19
8 8
20 20
9 9
21 21
10 10
22 22
11 11
23 23
同样是没有问题的,也就是说可以得出结论:kxkxc 与cxkxk索引对应关系是 i,j,k对应k,i,j,也可以理解成kxkxc变成cxkxk是transpose 0,1,2变成1,2,0,ijk变成kij
接差用c语言的思路来实现
num_grid_x=4 #宽
num_grid_y=4 #高
num_cls=2 #分类
a = np.arange(num_cls*num_grid_x*num_grid_y).reshape(num_cls,num_grid_y,num_grid_x) #裁剪后模型输出c,k,k
aa=a.flatten()
b = a.transpose(1,2,0) # reshape transpose
bb = b.flatten()
#以上部分a就是裁剪后的模型,b就是完整的,我们想用正常的访问b的方法来实现对a的访问,从而实现reshape和transpose
for i in range(num_grid_y):
for j in range(num_grid_x):
for k in range(num_cls):
tb=i*num_grid_x*num_cls+j*num_cls+k
ta=k*num_grid_y*num_grid_x+i*num_grid_x+j
print(bb[tb]," ",aa[ta])
0 0
16 16
1 1
17 17
2 2
18 18
3 3
19 19
4 4
20 20
5 5
21 21
6 6
22 22
7 7
23 23
8 8
24 24
9 9
25 25
10 10
26 26
11 11
27 27
12 12
28 28
13 13
29 29
14 14
30 30
15 15
31 31
关于c代码,可以查看我上边的PicoDet::generate_proposal里代码
2.2 回归的reshape和transpose的python模拟
前边32是固定的
num_grid_x=4
num_grid_y=4
a = np.arange(4*8*num_grid_x*num_grid_y).reshape(32,num_grid_y,num_grid_x).reshape(4,8,num_grid_x,num_grid_y)# a是裁剪后的返回结果 32xkxk与4x8xkxk在内存中是差别不大的
b = a.transpose(2,3,0,1)
for i in range(num_grid_y):
for j in range(num_grid_x):
for k in range(4):
print(b[i,j,k]," ",a[k,:,i,j])
[ 0 16 32 48 64 80 96 112] [ 0 16 32 48 64 80 96 112]
[128 144 160 176 192 208 224 240] [128 144 160 176 192 208 224 240]
[256 272 288 304 320 336 352 368] [256 272 288 304 320 336 352 368]
[384 400 416 432 448 464 480 496] [384 400 416 432 448 464 480 496]
[ 1 17 33 49 65 81 97 113] [ 1 17 33 49 65 81 97 113]
[129 145 161 177 193 209 225 241] [129 145 161 177 193 209 225 241]
[257 273 289 305 321 337 353 369] [257 273 289 305 321 337 353 369]
[385 401 417 433 449 465 481 497] [385 401 417 433 449 465 481 497]
[ 2 18 34 50 66 82 98 114] [ 2 18 34 50 66 82 98 114]
[130 146 162 178 194 210 226 242] [130 146 162 178 194 210 226 242]
[258 274 290 306 322 338 354 370] [258 274 290 306 322 338 354 370]
[386 402 418 434 450 466 482 498] [386 402 418 434 450 466 482 498]
[ 3 19 35 51 67 83 99 115] [ 3 19 35 51 67 83 99 115]
[131 147 163 179 195 211 227 243] [131 147 163 179 195 211 227 243]
[259 275 291 307 323 339 355 371] [259 275 291 307 323 339 355 371]
[387 403 419 435 451 467 483 499] [387 403 419 435 451 467 483 499]
[ 4 20 36 52 68 84 100 116] [ 4 20 36 52 68 84 100 116]
[132 148 164 180 196 212 228 244] [132 148 164 180 196 212 228 244]
[260 276 292 308 324 340 356 372] [260 276 292 308 324 340 356 372]
[388 404 420 436 452 468 484 500] [388 404 420 436 452 468 484 500]
[ 5 21 37 53 69 85 101 117] [ 5 21 37 53 69 85 101 117]
[133 149 165 181 197 213 229 245] [133 149 165 181 197 213 229 245]
[261 277 293 309 325 341 357 373] [261 277 293 309 325 341 357 373]
[389 405 421 437 453 469 485 501] [389 405 421 437 453 469 485 501]
[ 6 22 38 54 70 86 102 118] [ 6 22 38 54 70 86 102 118]
[134 150 166 182 198 214 230 246] [134 150 166 182 198 214 230 246]
[262 278 294 310 326 342 358 374] [262 278 294 310 326 342 358 374]
[390 406 422 438 454 470 486 502] [390 406 422 438 454 470 486 502]
[ 7 23 39 55 71 87 103 119] [ 7 23 39 55 71 87 103 119]
[135 151 167 183 199 215 231 247] [135 151 167 183 199 215 231 247]
[263 279 295 311 327 343 359 375] [263 279 295 311 327 343 359 375]
[391 407 423 439 455 471 487 503] [391 407 423 439 455 471 487 503]
[ 8 24 40 56 72 88 104 120] [ 8 24 40 56 72 88 104 120]
[136 152 168 184 200 216 232 248] [136 152 168 184 200 216 232 248]
[264 280 296 312 328 344 360 376] [264 280 296 312 328 344 360 376]
[392 408 424 440 456 472 488 504] [392 408 424 440 456 472 488 504]
[ 9 25 41 57 73 89 105 121] [ 9 25 41 57 73 89 105 121]
[137 153 169 185 201 217 233 249] [137 153 169 185 201 217 233 249]
[265 281 297 313 329 345 361 377] [265 281 297 313 329 345 361 377]
[393 409 425 441 457 473 489 505] [393 409 425 441 457 473 489 505]
[ 10 26 42 58 74 90 106 122] [ 10 26 42 58 74 90 106 122]
[138 154 170 186 202 218 234 250] [138 154 170 186 202 218 234 250]
[266 282 298 314 330 346 362 378] [266 282 298 314 330 346 362 378]
[394 410 426 442 458 474 490 506] [394 410 426 442 458 474 490 506]
[ 11 27 43 59 75 91 107 123] [ 11 27 43 59 75 91 107 123]
[139 155 171 187 203 219 235 251] [139 155 171 187 203 219 235 251]
[267 283 299 315 331 347 363 379] [267 283 299 315 331 347 363 379]
[395 411 427 443 459 475 491 507] [395 411 427 443 459 475 491 507]
[ 12 28 44 60 76 92 108 124] [ 12 28 44 60 76 92 108 124]
[140 156 172 188 204 220 236 252] [140 156 172 188 204 220 236 252]
[268 284 300 316 332 348 364 380] [268 284 300 316 332 348 364 380]
[396 412 428 444 460 476 492 508] [396 412 428 444 460 476 492 508]
[ 13 29 45 61 77 93 109 125] [ 13 29 45 61 77 93 109 125]
[141 157 173 189 205 221 237 253] [141 157 173 189 205 221 237 253]
[269 285 301 317 333 349 365 381] [269 285 301 317 333 349 365 381]
[397 413 429 445 461 477 493 509] [397 413 429 445 461 477 493 509]
[ 14 30 46 62 78 94 110 126] [ 14 30 46 62 78 94 110 126]
[142 158 174 190 206 222 238 254] [142 158 174 190 206 222 238 254]
[270 286 302 318 334 350 366 382] [270 286 302 318 334 350 366 382]
[398 414 430 446 462 478 494 510] [398 414 430 446 462 478 494 510]
[ 15 31 47 63 79 95 111 127] [ 15 31 47 63 79 95 111 127]
[143 159 175 191 207 223 239 255] [143 159 175 191 207 223 239 255]
[271 287 303 319 335 351 367 383] [271 287 303 319 335 351 367 383]
[399 415 431 447 463 479 495 511] [399 415 431 447 463 479 495 511]
上边代码,因为是四维的,但只用三维的来操作,只是为了做到32变成4*8,获取4份连续的8个数(8个可能的位置),换成c代码来看下边两份代码:
num_grid_x=4
num_grid_y=4
a = np.arange(4*8*num_grid_x*num_grid_y).reshape(32,num_grid_y,num_grid_x).reshape(4,8,num_grid_x,num_grid_y)
aa = a.flatten()
b = a.transpose(2,3,0,1)
bb = b.flatten()
for i in range(num_grid_y):
for j in range(num_grid_x):
for k in range(4):
#print(b[i,j,k]," ",a[k,:,i,j])
t1 =i*num_grid_x*32+j*32+k*8
t2 =k*8*num_grid_x*num_grid_y+i*num_grid_x+j
print(f"{bb[t1:t1+8]} {aa[t2:t2+8]}")
[ 0 16 32 48 64 80 96 112] [0 1 2 3 4 5 6 7]
[128 144 160 176 192 208 224 240] [128 129 130 131 132 133 134 135]
[256 272 288 304 320 336 352 368] [256 257 258 259 260 261 262 263]
[384 400 416 432 448 464 480 496] [384 385 386 387 388 389 390 391]
[ 1 17 33 49 65 81 97 113] [1 2 3 4 5 6 7 8]
[129 145 161 177 193 209 225 241] [129 130 131 132 133 134 135 136]
[257 273 289 305 321 337 353 369] [257 258 259 260 261 262 263 264]
[385 401 417 433 449 465 481 497] [385 386 387 388 389 390 391 392]
[ 2 18 34 50 66 82 98 114] [2 3 4 5 6 7 8 9]
[130 146 162 178 194 210 226 242] [130 131 132 133 134 135 136 137]
[258 274 290 306 322 338 354 370] [258 259 260 261 262 263 264 265]
[386 402 418 434 450 466 482 498] [386 387 388 389 390 391 392 393]
[ 3 19 35 51 67 83 99 115] [ 3 4 5 6 7 8 9 10]
[131 147 163 179 195 211 227 243] [131 132 133 134 135 136 137 138]
[259 275 291 307 323 339 355 371] [259 260 261 262 263 264 265 266]
[387 403 419 435 451 467 483 499] [387 388 389 390 391 392 393 394]
[ 4 20 36 52 68 84 100 116] [ 4 5 6 7 8 9 10 11]
[132 148 164 180 196 212 228 244] [132 133 134 135 136 137 138 139]
[260 276 292 308 324 340 356 372] [260 261 262 263 264 265 266 267]
[388 404 420 436 452 468 484 500] [388 389 390 391 392 393 394 395]
[ 5 21 37 53 69 85 101 117] [ 5 6 7 8 9 10 11 12]
[133 149 165 181 197 213 229 245] [133 134 135 136 137 138 139 140]
[261 277 293 309 325 341 357 373] [261 262 263 264 265 266 267 268]
[389 405 421 437 453 469 485 501] [389 390 391 392 393 394 395 396]
[ 6 22 38 54 70 86 102 118] [ 6 7 8 9 10 11 12 13]
[134 150 166 182 198 214 230 246] [134 135 136 137 138 139 140 141]
[262 278 294 310 326 342 358 374] [262 263 264 265 266 267 268 269]
[390 406 422 438 454 470 486 502] [390 391 392 393 394 395 396 397]
[ 7 23 39 55 71 87 103 119] [ 7 8 9 10 11 12 13 14]
[135 151 167 183 199 215 231 247] [135 136 137 138 139 140 141 142]
[263 279 295 311 327 343 359 375] [263 264 265 266 267 268 269 270]
[391 407 423 439 455 471 487 503] [391 392 393 394 395 396 397 398]
[ 8 24 40 56 72 88 104 120] [ 8 9 10 11 12 13 14 15]
[136 152 168 184 200 216 232 248] [136 137 138 139 140 141 142 143]
[264 280 296 312 328 344 360 376] [264 265 266 267 268 269 270 271]
[392 408 424 440 456 472 488 504] [392 393 394 395 396 397 398 399]
[ 9 25 41 57 73 89 105 121] [ 9 10 11 12 13 14 15 16]
[137 153 169 185 201 217 233 249] [137 138 139 140 141 142 143 144]
[265 281 297 313 329 345 361 377] [265 266 267 268 269 270 271 272]
[393 409 425 441 457 473 489 505] [393 394 395 396 397 398 399 400]
[ 10 26 42 58 74 90 106 122] [10 11 12 13 14 15 16 17]
[138 154 170 186 202 218 234 250] [138 139 140 141 142 143 144 145]
[266 282 298 314 330 346 362 378] [266 267 268 269 270 271 272 273]
[394 410 426 442 458 474 490 506] [394 395 396 397 398 399 400 401]
[ 11 27 43 59 75 91 107 123] [11 12 13 14 15 16 17 18]
[139 155 171 187 203 219 235 251] [139 140 141 142 143 144 145 146]
[267 283 299 315 331 347 363 379] [267 268 269 270 271 272 273 274]
[395 411 427 443 459 475 491 507] [395 396 397 398 399 400 401 402]
[ 12 28 44 60 76 92 108 124] [12 13 14 15 16 17 18 19]
[140 156 172 188 204 220 236 252] [140 141 142 143 144 145 146 147]
[268 284 300 316 332 348 364 380] [268 269 270 271 272 273 274 275]
[396 412 428 444 460 476 492 508] [396 397 398 399 400 401 402 403]
[ 13 29 45 61 77 93 109 125] [13 14 15 16 17 18 19 20]
[141 157 173 189 205 221 237 253] [141 142 143 144 145 146 147 148]
[269 285 301 317 333 349 365 381] [269 270 271 272 273 274 275 276]
[397 413 429 445 461 477 493 509] [397 398 399 400 401 402 403 404]
[ 14 30 46 62 78 94 110 126] [14 15 16 17 18 19 20 21]
[142 158 174 190 206 222 238 254] [142 143 144 145 146 147 148 149]
[270 286 302 318 334 350 366 382] [270 271 272 273 274 275 276 277]
[398 414 430 446 462 478 494 510] [398 399 400 401 402 403 404 405]
[ 15 31 47 63 79 95 111 127] [15 16 17 18 19 20 21 22]
[143 159 175 191 207 223 239 255] [143 144 145 146 147 148 149 150]
[271 287 303 319 335 351 367 383] [271 272 273 274 275 276 277 278]
[399 415 431 447 463 479 495 511] [399 400 401 402 403 404 405 406]
num_grid_x=4
num_grid_y=4
a = np.arange(4*8*num_grid_x*num_grid_y).reshape(32,num_grid_y,num_grid_x).reshape(4,8,num_grid_x,num_grid_y)
aa = a.flatten()
b = a.transpose(2,3,0,1) # num_grid_y num_grid_x 4 8
bb = b.flatten()
for i in range(num_grid_y):
for j in range(num_grid_x):
for k in range(4):
#print(b[i,j,k]," ",a[k,:,i,j])
t1 =i*num_grid_x*32+j*32+k*8
t2 =k*8*num_grid_x*num_grid_y+i*num_grid_x+j
print(f"{bb[t1:t1+8]} {aa[t2:t2+8*16:16]}")
[ 0 16 32 48 64 80 96 112] [ 0 16 32 48 64 80 96 112]
[128 144 160 176 192 208 224 240] [128 144 160 176 192 208 224 240]
[256 272 288 304 320 336 352 368] [256 272 288 304 320 336 352 368]
[384 400 416 432 448 464 480 496] [384 400 416 432 448 464 480 496]
[ 1 17 33 49 65 81 97 113] [ 1 17 33 49 65 81 97 113]
[129 145 161 177 193 209 225 241] [129 145 161 177 193 209 225 241]
[257 273 289 305 321 337 353 369] [257 273 289 305 321 337 353 369]
[385 401 417 433 449 465 481 497] [385 401 417 433 449 465 481 497]
[ 2 18 34 50 66 82 98 114] [ 2 18 34 50 66 82 98 114]
[130 146 162 178 194 210 226 242] [130 146 162 178 194 210 226 242]
[258 274 290 306 322 338 354 370] [258 274 290 306 322 338 354 370]
[386 402 418 434 450 466 482 498] [386 402 418 434 450 466 482 498]
[ 3 19 35 51 67 83 99 115] [ 3 19 35 51 67 83 99 115]
[131 147 163 179 195 211 227 243] [131 147 163 179 195 211 227 243]
[259 275 291 307 323 339 355 371] [259 275 291 307 323 339 355 371]
[387 403 419 435 451 467 483 499] [387 403 419 435 451 467 483 499]
[ 4 20 36 52 68 84 100 116] [ 4 20 36 52 68 84 100 116]
[132 148 164 180 196 212 228 244] [132 148 164 180 196 212 228 244]
[260 276 292 308 324 340 356 372] [260 276 292 308 324 340 356 372]
[388 404 420 436 452 468 484 500] [388 404 420 436 452 468 484 500]
[ 5 21 37 53 69 85 101 117] [ 5 21 37 53 69 85 101 117]
[133 149 165 181 197 213 229 245] [133 149 165 181 197 213 229 245]
[261 277 293 309 325 341 357 373] [261 277 293 309 325 341 357 373]
[389 405 421 437 453 469 485 501] [389 405 421 437 453 469 485 501]
[ 6 22 38 54 70 86 102 118] [ 6 22 38 54 70 86 102 118]
[134 150 166 182 198 214 230 246] [134 150 166 182 198 214 230 246]
[262 278 294 310 326 342 358 374] [262 278 294 310 326 342 358 374]
[390 406 422 438 454 470 486 502] [390 406 422 438 454 470 486 502]
[ 7 23 39 55 71 87 103 119] [ 7 23 39 55 71 87 103 119]
[135 151 167 183 199 215 231 247] [135 151 167 183 199 215 231 247]
[263 279 295 311 327 343 359 375] [263 279 295 311 327 343 359 375]
[391 407 423 439 455 471 487 503] [391 407 423 439 455 471 487 503]
[ 8 24 40 56 72 88 104 120] [ 8 24 40 56 72 88 104 120]
[136 152 168 184 200 216 232 248] [136 152 168 184 200 216 232 248]
[264 280 296 312 328 344 360 376] [264 280 296 312 328 344 360 376]
[392 408 424 440 456 472 488 504] [392 408 424 440 456 472 488 504]
[ 9 25 41 57 73 89 105 121] [ 9 25 41 57 73 89 105 121]
[137 153 169 185 201 217 233 249] [137 153 169 185 201 217 233 249]
[265 281 297 313 329 345 361 377] [265 281 297 313 329 345 361 377]
[393 409 425 441 457 473 489 505] [393 409 425 441 457 473 489 505]
[ 10 26 42 58 74 90 106 122] [ 10 26 42 58 74 90 106 122]
[138 154 170 186 202 218 234 250] [138 154 170 186 202 218 234 250]
[266 282 298 314 330 346 362 378] [266 282 298 314 330 346 362 378]
[394 410 426 442 458 474 490 506] [394 410 426 442 458 474 490 506]
[ 11 27 43 59 75 91 107 123] [ 11 27 43 59 75 91 107 123]
[139 155 171 187 203 219 235 251] [139 155 171 187 203 219 235 251]
[267 283 299 315 331 347 363 379] [267 283 299 315 331 347 363 379]
[395 411 427 443 459 475 491 507] [395 411 427 443 459 475 491 507]
[ 12 28 44 60 76 92 108 124] [ 12 28 44 60 76 92 108 124]
[140 156 172 188 204 220 236 252] [140 156 172 188 204 220 236 252]
[268 284 300 316 332 348 364 380] [268 284 300 316 332 348 364 380]
[396 412 428 444 460 476 492 508] [396 412 428 444 460 476 492 508]
[ 13 29 45 61 77 93 109 125] [ 13 29 45 61 77 93 109 125]
[141 157 173 189 205 221 237 253] [141 157 173 189 205 221 237 253]
[269 285 301 317 333 349 365 381] [269 285 301 317 333 349 365 381]
[397 413 429 445 461 477 493 509] [397 413 429 445 461 477 493 509]
[ 14 30 46 62 78 94 110 126] [ 14 30 46 62 78 94 110 126]
[142 158 174 190 206 222 238 254] [142 158 174 190 206 222 238 254]
[270 286 302 318 334 350 366 382] [270 286 302 318 334 350 366 382]
[398 414 430 446 462 478 494 510] [398 414 430 446 462 478 494 510]
[ 15 31 47 63 79 95 111 127] [ 15 31 47 63 79 95 111 127]
[143 159 175 191 207 223 239 255] [143 159 175 191 207 223 239 255]
[271 287 303 319 335 351 367 383] [271 287 303 319 335 351 367 383]
[399 415 431 447 463 479 495 511] [399 415 431 447 463 479 495 511]
所以要用c代码的话:
//reg_1max=8,tmp用来存放连续的8个数
float* tmp = new float[reg_1max];
for (int m = 0; m < reg_1max; m++)
{
tmp[m] = out_box[k * reg_1max* num_grid_y * num_grid_x + i * num_grid_x + j + m * num_grid_y * num_grid_x];
}
理解一下,kxkx4x8对应的是ik48+j48+k8,这是8个数的首地址,接差连取8个即可
4x8xkxk对应的是python: k,:,i,j,冒号就是全取,c对应的是:k8ij+ik+j+mkk ,这个m就是冒号这里的位置,取m=0,1,2,3,4,5,6,7,分别表示这一维度下的8个。
3、softmax改进
换了一个softmax的实现方式
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <fstream>
#include <string>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
//#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>
//using namespace cv;
//using namespace std;
//using namespace Ort;
typedef struct BoxInfo
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class PicoDet
{
public:
PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold);
void detect(cv::Mat& cv_image);
private:
float score_threshold = 0.5;
float nms_threshold = 0.5;
std::vector<std::string> class_names;
int num_class;
cv::Mat resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left);
std::vector<float> input_image_;
void normalize_(cv::Mat img);
inline float fast_exp(float x);
template <typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length);
//void softmax_(const float* x, float* y, int length);
void generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box);
void nms(std::vector<BoxInfo>& input_boxes);
const bool keep_ratio = false;
int inpWidth;
int inpHeight;
int num_outs;
int reg_max;
std::vector<int> stride;
//const float mean[3] = { 103.53, 116.28, 123.675 };
//const float stds[3] = { 57.375, 57.12, 58.395 };
const float mean[3] = { 0.0, 0.0, 0.0 };
const float stds[3] = { 255.0, 255.0, 255.0 };
Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "picodet");
Ort::Session* ort_session = nullptr;
Ort::SessionOptions sessionOptions = Ort::SessionOptions();
std::vector<char*> input_names;
std::vector<char*> output_names;
std::vector<std::vector<int64_t>> input_node_dims; // >=1 outputs
std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
};
inline float PicoDet::fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
template <typename _Tp>
int PicoDet::activation_function_softmax(const _Tp* src, _Tp* dst, int length) {
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{ 0 };
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
PicoDet::PicoDet(std::string model_path, std::string classesFile, float nms_threshold, float objThreshold)
{
std::ifstream ifs(classesFile.c_str());
std::string line;
while (std::getline(ifs, line)) this->class_names.push_back(line);
this->num_class = class_names.size();
this->nms_threshold = nms_threshold;
this->score_threshold = objThreshold;
std::wstring widestr = std::wstring(model_path.begin(), model_path.end());
//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
ort_session = new Ort::Session(env, widestr.c_str(), sessionOptions);
size_t numInputNodes = ort_session->GetInputCount();
size_t numOutputNodes = ort_session->GetOutputCount();
Ort::AllocatorWithDefaultOptions allocator;
for (int i = 0; i < numInputNodes; i++)
{
input_names.push_back(ort_session->GetInputName(i, allocator));
Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_node_dims.push_back(input_dims);
}
for (int i = 0; i < numOutputNodes; i++)
{
output_names.push_back(ort_session->GetOutputName(i, allocator));
Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
output_node_dims.push_back(output_dims);
/*for (int j = 0; j < output_dims.size(); j++)
{
cout << output_dims[j] << ",";
}
cout << endl;*/
}
this->inpHeight = input_node_dims[0][2];
this->inpWidth = input_node_dims[0][3];
this->num_outs = int(numOutputNodes * 0.5);
this->reg_max = output_node_dims[this->num_outs][output_node_dims[this->num_outs].size() - 1] / 4 - 1;
for (int i = 0; i < this->num_outs; i++)
{
stride.push_back(int(8 * pow(2, i)));
}
}
cv::Mat PicoDet::resize_image(cv::Mat srcimg, int* newh, int* neww, int* top, int* left)
{
int srch = srcimg.rows, srcw = srcimg.cols;
*newh = this->inpHeight;
*neww = this->inpWidth;
cv::Mat dstimg;
if (this->keep_ratio && srch != srcw) {
float hw_scale = (float)srch / srcw;
if (hw_scale > 1) {
*newh = this->inpHeight;
*neww = int(this->inpWidth / hw_scale);
resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);
*left = int((this->inpWidth - *neww) * 0.5);
copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, cv::BORDER_CONSTANT, 0);
}
else {
*newh = (int)this->inpHeight * hw_scale;
*neww = this->inpWidth;
resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);
*top = (int)(this->inpHeight - *newh) * 0.5;
copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, cv::BORDER_CONSTANT, 0);
}
}
else {
cv::resize(srcimg, dstimg, cv::Size(*neww, *newh), cv::INTER_AREA);
}
return dstimg;
}
void PicoDet::normalize_(cv::Mat img)
{
// img.convertTo(img, CV_32F);
int row = img.rows;
int col = img.cols;
this->input_image_.resize(row * col * img.channels());
for (int c = 0; c < 3; c++)
{
for (int i = 0; i < row; i++)
{
for (int j = 0; j < col; j++)
{
float pix = img.ptr<uchar>(i)[j * 3 + c];
this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - mean[c] / 255.0) / (stds[c] / 255.0);
//this->input_image_[c * row * col + i * col + j] = (pix - mean[c]) / stds[c];
}
}
}
}
/*
void PicoDet::softmax_(const float* x, float* y, int length)
{
float sum = 0;
int i = 0;
for (i = 0; i < length; i++)
{
y[i] = exp(x[i]);
sum += y[i];
}
for (i = 0; i < length; i++)
{
y[i] /= sum;
}
}
*/
void PicoDet::generate_proposal(std::vector<BoxInfo>& generate_boxes, const int stride_, const float* out_score, const float* out_box)
{
const int num_grid_y = (int)ceil((float)this->inpHeight / stride_);
const int num_grid_x = (int)ceil((float)this->inpWidth / stride_);
cout << "num_grid_x=" << num_grid_x << ",num_grid_y=" << num_grid_y << endl;
const int reg_1max = reg_max + 1;
//std::cout << "score:" << std::endl;
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
int max_ind = 0;
float max_score = 0;
for (int k = 0; k < num_class; k++)
{
/*这个代码是原始的输出*/
//float score = out_score[i * num_grid_x * num_class + j * num_class + k];
/*以下代码是去掉reshape和transpose的,用C来实现这个功能的这两部分代码选一个即可,可以理解成ijk对应kij*/
float score = std::sqrt(out_score[k*num_grid_y*num_grid_x+i*num_grid_x+j]);
//std::cout <<score << " ";
if (score > max_score)
{
max_score = score;
max_ind = k;
}
}
if (max_score >= score_threshold)
{
std::cout << "box:" << std::endl;
//const float* pbox = out_box + idx * reg_1max * 4;
float dis_pred[4];
float* y = new float[reg_1max];
for (int k = 0; k < 4; k++)
{
/*原始模型*/
//const float* tmp = out_box + i * num_grid_x * reg_1max * 4 + j * reg_1max * 4 + k * reg_1max;
//std::cout << "r:" << *tmp << std::endl;
/*换用没有reshape transpose的*/
float* tmp = new float[reg_1max];
for (int m = 0; m < reg_1max; m++)
{
tmp[m] = out_box[k * num_grid_y * num_grid_x * reg_1max + i * num_grid_x + j + m * num_grid_y * num_grid_x];
}
//std::cout << "r:" << *tmp << std::endl;
//softmax_(tmp, y, reg_1max);
activation_function_softmax(tmp, y, reg_1max);
float dis = 0.f;
for (int l = 0; l < reg_1max; l++)
{
dis += l * y[l];
}
dis_pred[k] = dis * stride_;
}
delete[] y;
float pb_cx = (j + 0.5f) * stride_ - 0.5;
float pb_cy = (i + 0.5f) * stride_ - 0.5;
float x0 = pb_cx - dis_pred[0];
float y0 = pb_cy - dis_pred[1];
float x1 = pb_cx + dis_pred[2];
float y1 = pb_cy + dis_pred[3];
generate_boxes.push_back(BoxInfo{ x0, y0, x1, y1, max_score, max_ind });
}
}
}
}
void PicoDet::nms(std::vector<BoxInfo>& input_boxes)
{
sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
std::vector<bool> isSuppressed(input_boxes.size(), false);
for (int i = 0; i < int(input_boxes.size()); ++i)
{
if (isSuppressed[i]) { continue; }
for (int j = i + 1; j < int(input_boxes.size()); ++j)
{
if (isSuppressed[j]) { continue; }
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= this->nms_threshold)
{
isSuppressed[j] = true;
}
}
}
// return post_nms;
int idx_t = 0;
input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
}
void PicoDet::detect(cv::Mat& srcimg)
{
int newh = 0, neww = 0, top = 0, left = 0;
cv::Mat cv_image = srcimg.clone();
cv::Mat dst = this->resize_image(cv_image, &newh, &neww, &top, &left);
this->normalize_(dst);
std::array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };
auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
std::vector<Ort::Value> ort_outputs = ort_session->Run(Ort::RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); // ???????
/generate proposals
std::vector<BoxInfo> generate_boxes;
for (int i = 0; i < this->num_outs; i++)
{
//auto cls_shape = this->output_node_dims[i];
const float* cls_score = ort_outputs[i].GetTensorMutableData<float>();
//std::vector<int64_t> new_cls_shape = { cls_shape[0],cls_shape[1],cls_shape[2] * cls_shape[3] };
const float* bbox_pred = ort_outputs[i + this->num_outs].GetTensorMutableData<float>();
//auto reg_shape = this->output_node_dims[i+this->num_outs];
generate_proposal(generate_boxes, stride[i], cls_score, bbox_pred);
}
Perform non maximum suppression to eliminate redundant overlapping boxes with
lower confidences
nms(generate_boxes);
float ratioh = (float)cv_image.rows / newh;
float ratiow = (float)cv_image.cols / neww;
for (size_t i = 0; i < generate_boxes.size(); ++i)
{
int xmin = (int)std::max((generate_boxes[i].x1 - left) * ratiow, 0.f);
int ymin = (int)std::max((generate_boxes[i].y1 - top) * ratioh, 0.f);
int xmax = (int)std::min((generate_boxes[i].x2 - left) * ratiow, (float)cv_image.cols);
int ymax = (int)std::min((generate_boxes[i].y2 - top) * ratioh, (float)cv_image.rows);
rectangle(srcimg, cv::Point(xmin, ymin), cv::Point(xmax, ymax), cv::Scalar(0, 0, 255), 2);
std::string label = cv::format("%.2f", generate_boxes[i].score);
label = this->class_names[generate_boxes[i].label] + ":" + label;
putText(srcimg, label, cv::Point(xmin, ymin - 5), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 255, 0), 1);
}
}
int main()
{
//PicoDet mynet("C:/Users/tl/Desktop/demo_ncnn/ncnn_our/picodet_xs_320_voc_256_20230405_shape.onnx", "C:/Users/tl/Desktop/demo_ncnn/ncnn_our/ball.names", 0.5, 0.5); /// choice = ["picodet_m_320_coco.onnx", "picodet_m_416_coco.onnx", "picodet_s_320_coco.onnx", "picodet_s_416_coco.onnx"]
PicoDet mynet("C:/Users/tl/Desktop/demo_ncnn/ncnn_our/picodet_xs_320_voc_256_20230405_shape_sim_prune.onnx", "C:/Users/tl/Desktop/demo_ncnn/ncnn_our/ball.names", 0.5, 0.5);
std::string imgpath = "C:/Users/tl/Desktop/demo_ncnn/ncnn_our/test.jpg";
cv::Mat srcimg = cv::imread(imgpath);
mynet.detect(srcimg);
cv::imwrite("C:/Users/tl/Desktop/demo_ncnn/ncnn_our/test_result.jpg", srcimg);
static const std::string kWinName = "Deep learning object detection in ONNXRuntime";
cv::namedWindow(kWinName, cv::WINDOW_NORMAL);
cv::imshow(kWinName, srcimg);
cv::waitKey(0);
cv::destroyAllWindows();
}
更多推荐
所有评论(0)