C++ opencv 实现视觉算法中的前处理
C++ opencv 实现视觉算法中的前处理
·
C++ opencv 实现视觉算法中的前处理
实现1: 直接缩放处理
void YOLOv8_pose::copy_from_Mat(const cv::Mat& image, cv::Size& size)
{
this->imgS = image.size();
cv::Mat tmp, nchw;
cv::resize(image, tmp, size);
//标准化
cv::Mat tmp_(tmp.size(), CV_32FC3, cv::Scalar::all(0));
double mean[3] = { 0.485, 0.456, 0.406 };
double std[3] = { 0.229, 0.224, 0.225 };
for (int h = 0; h < tmp.rows; h++)
{
for (int w = 0; w < tmp.cols; w++) {
for (int k = 0; k < 3; ++k)
{
tmp_.at<cv::Vec3f>(h, w)[k] = (double(tmp.at<cv::Vec3b>(h, w)[k]) / 255. - mean[k]) / std[k];
}
}
}
cv::dnn::blobFromImage(tmp, nchw, 1. / 255., cv::Size(), cv::Scalar(0, 0, 0), false, false, CV_32F);
CHECK(cudaMemcpyAsync(
this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
}
实现2: 传统Letterbox + OpenCV函数链
#include <opencv2/opencv.hpp>
#include <vector>
/**
* 使用OpenCV函数链实现图像预处理
* 优点:代码简洁,利用OpenCV优化
* 缺点:多次遍历,产生中间结果
*/
std::vector<float> preprocess_with_opencv(const cv::Mat& src_img,
int target_w, int target_h,
const float* mean, const float* std) {
// 1. 计算Letterbox缩放参数
float scale_w = static_cast<float>(target_w) / src_img.cols;
float scale_h = static_cast<float>(target_h) / src_img.rows;
float scale = std::min(scale_w, scale_h);
int new_w = static_cast<int>(src_img.cols * scale);
int new_h = static_cast<int>(src_img.rows * scale);
int dx = (target_w - new_w) / 2;
int dy = (target_h - new_h) / 2;
// 2. 缩放图像 (使用OpenCV的resize)
cv::Mat resized;
cv::resize(src_img, resized, cv::Size(new_w, new_h), 0, 0, cv::INTER_LINEAR);
// 3. Letterbox填充 (创建画布,填充背景色,如114)
cv::Mat letterboxed(target_h, target_w, src_img.type(), cv::Scalar(114, 114, 114));
resized.copyTo(letterboxed(cv::Rect(dx, dy, new_w, new_h)));
// 4. BGR转RGB
cv::Mat rgb;
cv::cvtColor(letterboxed, rgb, cv::COLOR_BGR2RGB);
// 5. 转换为float并归一化
cv::Mat float_img;
rgb.convertTo(float_img, CV_32FC3, 1.0 / 255.0);
// 6. 减均值除方差
std::vector<cv::Mat> channels(3);
cv::split(float_img, channels);
std::vector<float> tensor(3 * target_h * target_w);
for (int c = 0; c < 3; ++c) {
channels[c] = (channels[c] - mean[c]) / std[c];
std::memcpy(tensor.data() + c * target_h * target_w,
channels[c].data, target_h * target_w * sizeof(float));
}
return tensor;
}
实现3 :单次遍历融合版(性能最优)
/**
* 融合所有操作到单次遍历,避免中间结果
* 优点:内存访问次数最少,缓存友好
* 缺点:代码复杂度较高
*/
std::vector<float> preprocess_single_pass(const cv::Mat& src_img,
int target_w, int target_h,
const float* mean, const float* std) {
std::vector<float> tensor(3 * target_h * target_w);
// 计算变换参数
float scale = std::min(static_cast<float>(target_w) / src_img.cols,
static_cast<float>(target_h) / src_img.rows);
int new_w = static_cast<int>(src_img.cols * scale);
int new_h = static_cast<int>(src_img.rows * scale);
int offset_x = (target_w - new_w) / 2;
int offset_y = (target_h - new_h) / 2;
float inv_scale = 1.0f / scale;
// 预计算背景值归一化后的结果 (RGB顺序)
float bg_r = (114 - mean[0]) / std[0]; // mean/std已是RGB顺序
float bg_g = (114 - mean[1]) / std[1];
float bg_b = (114 - mean[2]) / std[2];
// 单次遍历目标图像
for (int dst_y = 0; dst_y < target_h; ++dst_y) {
float* row_ptr = tensor.data() + dst_y * target_w;
// 判断当前行是否在有效区域内
if (dst_y < offset_y || dst_y >= offset_y + new_h) {
// 填充行
for (int x = 0; x < target_w; ++x) {
row_ptr[x] = bg_r;
row_ptr[x + target_h * target_w] = bg_g;
row_ptr[x + 2 * target_h * target_w] = bg_b;
}
continue;
}
// 计算源图像对应坐标
float src_y_f = (dst_y - offset_y) * inv_scale;
int y0 = std::max(0, std::min(static_cast<int>(src_y_f), src_img.rows - 1));
int y1 = std::min(y0 + 1, src_img.rows - 1);
float dy = src_y_f - y0;
float dy1 = 1.0f - dy;
for (int dst_x = 0; dst_x < target_w; ++dst_x) {
if (dst_x < offset_x || dst_x >= offset_x + new_w) {
// 填充像素
row_ptr[dst_x] = bg_r;
row_ptr[dst_x + target_h * target_w] = bg_g;
row_ptr[dst_x + 2 * target_h * target_w] = bg_b;
continue;
}
// 双线性插值获取源图像像素
float src_x_f = (dst_x - offset_x) * inv_scale;
int x0 = std::max(0, std::min(static_cast<int>(src_x_f), src_img.cols - 1));
int x1 = std::min(x0 + 1, src_img.cols - 1);
float dx = src_x_f - x0;
float dx1 = 1.0f - dx;
// 获取4个邻域像素 (BGR格式)
const uchar* p00 = src_img.ptr<uchar>(y0) + x0 * 3;
const uchar* p10 = src_img.ptr<uchar>(y0) + x1 * 3;
const uchar* p01 = src_img.ptr<uchar>(y1) + x0 * 3;
const uchar* p11 = src_img.ptr<uchar>(y1) + x1 * 3;
// 双线性插值 + BGR转RGB + 归一化 融合计算
for (int c = 0; c < 3; ++c) {
int src_c = 2 - c; // BGR to RGB映射
float v00 = p00[src_c];
float v10 = p10[src_c];
float v01 = p01[src_c];
float v11 = p11[src_c];
float val = (v00 * dx1 + v10 * dx) * dy1 + (v01 * dx1 + v11 * dx) * dy;
float normalized = (val / 255.0f - mean[c]) / std[c];
row_ptr[dst_x + c * target_h * target_w] = normalized;
}
}
}
return tensor;
}
实现4:
#include <opencv2/opencv.hpp>
#include <vector>
#include <omp.h> // 需要开启 OpenMP 选项
/**
* @brief 高效 CPU 前处理 (支持 Gray 和 BGR -> Tensor CHW)
* @param src 原始图像 (cv::Mat, CV_8UC1 或 CV_8UC3)
* @param dst_data 目标内存指针 (已分配好 target_size * target_size * channels 空间)
* @param target_size 目标尺寸 (如 640)
*/
void cpu_preprocess_optimized(const cv::Mat& src, float* dst_data, int target_size) {
int src_w = src.cols;
int src_h = src.rows;
int channels = src.channels(); // 获取输入通道数 (1 或 3)
// 1. 计算缩放比例 (Letterbox)
float ratio = std::min((float)target_size / src_w, (float)target_size / src_h);
int new_w = round(src_w * ratio);
int new_h = round(src_h * ratio);
// 计算居中 Padding
int dw = (target_size - new_w) / 2;
int dh = (target_size - new_h) / 2;
// 初始化背景 (0.0f)
int plane_area = target_size * target_size;
std::fill(dst_data, dst_data + plane_area * channels, 0.0f);
float scale = 1.0f / ratio;
// 2. 多线程并行处理
#pragma omp parallel for collapse(2)
for (int y = 0; y < new_h; ++y) {
for (int x = 0; x < new_w; ++x) {
// 映射回原图坐标 (双线性插值)
float src_x = (x + 0.5f) * scale - 0.5f;
float src_y = (y + 0.5f) * scale - 0.5f;
int x0 = (int)std::floor(src_x);
int y0 = (int)std::floor(src_y);
int x1 = std::min(x0 + 1, src_w - 1);
int y1 = std::min(y0 + 1, src_h - 1);
x0 = std::max(x0, 0);
y0 = std::max(y0, 0);
float ly = src_y - y0;
float lx = src_x - x0;
float hy = 1.0f - ly;
float hx = 1.0f - lx;
int dst_base_idx = (y + dh) * target_size + (x + dw);
if (channels == 1) {
// --- 单通道处理逻辑 ---
float v00 = src.at<uint8_t>(y0, x0);
float v01 = src.at<uint8_t>(y0, x1);
float v10 = src.at<uint8_t>(y1, x0);
float v11 = src.at<uint8_t>(y1, x1);
float res = (v00 * hx + v01 * lx) * hy + (v10 * hx + v11 * lx) * ly;
dst_data[dst_base_idx] = res / 255.0f;
}
else {
// --- 三通道处理逻辑 (HWC -> CHW) ---
const uint8_t* p00 = src.ptr<uint8_t>(y0, x0);
const uint8_t* p01 = src.ptr<uint8_t>(y0, x1);
const uint8_t* p10 = src.ptr<uint8_t>(y1, x0);
const uint8_t* p11 = src.ptr<uint8_t>(y1, x1);
for (int c = 0; c < 3; ++c) {
float res = (p00[c] * hx + p01[c] * lx) * hy + (p10[c] * hx + p11[c] * lx) * ly;
// BGR -> CHW 排列:R/G/B 各占一个 plane_area
// 注意:YOLO 通常需要 RGB,如果输入是 BGR,这里可以写成 (2-c)*plane_area 换序
dst_data[c * plane_area + dst_base_idx] = res / 255.0f;
}
}
}
}
}
更多推荐
所有评论(0)