C++ opencv 实现视觉算法中的前处理

实现1: 直接缩放处理

void YOLOv8_pose::copy_from_Mat(const cv::Mat& image, cv::Size& size)
{
    this->imgS = image.size();

    cv::Mat tmp, nchw;
    cv::resize(image, tmp, size);
    
    //标准化
    cv::Mat tmp_(tmp.size(), CV_32FC3, cv::Scalar::all(0));
    double mean[3] = { 0.485, 0.456, 0.406 };
    double std[3] = { 0.229, 0.224, 0.225 };
    for (int h = 0; h < tmp.rows; h++)
    {
        for (int w = 0; w < tmp.cols; w++) {
            for (int k = 0; k < 3; ++k)
            {
                tmp_.at<cv::Vec3f>(h, w)[k] = (double(tmp.at<cv::Vec3b>(h, w)[k]) / 255. - mean[k]) / std[k];
            }
        }
    }

    cv::dnn::blobFromImage(tmp, nchw, 1. / 255., cv::Size(), cv::Scalar(0, 0, 0), false, false, CV_32F);
    CHECK(cudaMemcpyAsync(
        this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
}

实现2: 传统Letterbox + OpenCV函数链

#include <opencv2/opencv.hpp>
#include <vector>

/**
 * 使用OpenCV函数链实现图像预处理
 * 优点:代码简洁,利用OpenCV优化
 * 缺点:多次遍历,产生中间结果
 */
std::vector<float> preprocess_with_opencv(const cv::Mat& src_img, 
                                           int target_w, int target_h,
                                           const float* mean, const float* std) {
    // 1. 计算Letterbox缩放参数
    float scale_w = static_cast<float>(target_w) / src_img.cols;
    float scale_h = static_cast<float>(target_h) / src_img.rows;
    float scale = std::min(scale_w, scale_h);
    
    int new_w = static_cast<int>(src_img.cols * scale);
    int new_h = static_cast<int>(src_img.rows * scale);
    int dx = (target_w - new_w) / 2;
    int dy = (target_h - new_h) / 2;
    
    // 2. 缩放图像 (使用OpenCV的resize)
    cv::Mat resized;
    cv::resize(src_img, resized, cv::Size(new_w, new_h), 0, 0, cv::INTER_LINEAR);
    
    // 3. Letterbox填充 (创建画布,填充背景色,如114)
    cv::Mat letterboxed(target_h, target_w, src_img.type(), cv::Scalar(114, 114, 114));
    resized.copyTo(letterboxed(cv::Rect(dx, dy, new_w, new_h)));
    
    // 4. BGR转RGB
    cv::Mat rgb;
    cv::cvtColor(letterboxed, rgb, cv::COLOR_BGR2RGB);
    
    // 5. 转换为float并归一化
    cv::Mat float_img;
    rgb.convertTo(float_img, CV_32FC3, 1.0 / 255.0);
    
    // 6. 减均值除方差
    std::vector<cv::Mat> channels(3);
    cv::split(float_img, channels);
    
    std::vector<float> tensor(3 * target_h * target_w);
    for (int c = 0; c < 3; ++c) {
        channels[c] = (channels[c] - mean[c]) / std[c];
        std::memcpy(tensor.data() + c * target_h * target_w, 
                    channels[c].data, target_h * target_w * sizeof(float));
    }
    
    return tensor;
}

实现3 :单次遍历融合版(性能最优)

/**
 * 融合所有操作到单次遍历,避免中间结果
 * 优点:内存访问次数最少,缓存友好
 * 缺点:代码复杂度较高
 */
std::vector<float> preprocess_single_pass(const cv::Mat& src_img,
                                           int target_w, int target_h,
                                           const float* mean, const float* std) {
    std::vector<float> tensor(3 * target_h * target_w);
    
    // 计算变换参数
    float scale = std::min(static_cast<float>(target_w) / src_img.cols,
                          static_cast<float>(target_h) / src_img.rows);
    int new_w = static_cast<int>(src_img.cols * scale);
    int new_h = static_cast<int>(src_img.rows * scale);
    int offset_x = (target_w - new_w) / 2;
    int offset_y = (target_h - new_h) / 2;
    
    float inv_scale = 1.0f / scale;
    
    // 预计算背景值归一化后的结果 (RGB顺序)
    float bg_r = (114 - mean[0]) / std[0];  // mean/std已是RGB顺序
    float bg_g = (114 - mean[1]) / std[1];
    float bg_b = (114 - mean[2]) / std[2];
    
    // 单次遍历目标图像
    for (int dst_y = 0; dst_y < target_h; ++dst_y) {
        float* row_ptr = tensor.data() + dst_y * target_w;
        
        // 判断当前行是否在有效区域内
        if (dst_y < offset_y || dst_y >= offset_y + new_h) {
            // 填充行
            for (int x = 0; x < target_w; ++x) {
                row_ptr[x] = bg_r;
                row_ptr[x + target_h * target_w] = bg_g;
                row_ptr[x + 2 * target_h * target_w] = bg_b;
            }
            continue;
        }
        
        // 计算源图像对应坐标
        float src_y_f = (dst_y - offset_y) * inv_scale;
        int y0 = std::max(0, std::min(static_cast<int>(src_y_f), src_img.rows - 1));
        int y1 = std::min(y0 + 1, src_img.rows - 1);
        float dy = src_y_f - y0;
        float dy1 = 1.0f - dy;
        
        for (int dst_x = 0; dst_x < target_w; ++dst_x) {
            if (dst_x < offset_x || dst_x >= offset_x + new_w) {
                // 填充像素
                row_ptr[dst_x] = bg_r;
                row_ptr[dst_x + target_h * target_w] = bg_g;
                row_ptr[dst_x + 2 * target_h * target_w] = bg_b;
                continue;
            }
            
            // 双线性插值获取源图像像素
            float src_x_f = (dst_x - offset_x) * inv_scale;
            int x0 = std::max(0, std::min(static_cast<int>(src_x_f), src_img.cols - 1));
            int x1 = std::min(x0 + 1, src_img.cols - 1);
            float dx = src_x_f - x0;
            float dx1 = 1.0f - dx;
            
            // 获取4个邻域像素 (BGR格式)
            const uchar* p00 = src_img.ptr<uchar>(y0) + x0 * 3;
            const uchar* p10 = src_img.ptr<uchar>(y0) + x1 * 3;
            const uchar* p01 = src_img.ptr<uchar>(y1) + x0 * 3;
            const uchar* p11 = src_img.ptr<uchar>(y1) + x1 * 3;
            
            // 双线性插值 + BGR转RGB + 归一化 融合计算
            for (int c = 0; c < 3; ++c) {
                int src_c = 2 - c;  // BGR to RGB映射
                float v00 = p00[src_c];
                float v10 = p10[src_c];
                float v01 = p01[src_c];
                float v11 = p11[src_c];
                
                float val = (v00 * dx1 + v10 * dx) * dy1 + (v01 * dx1 + v11 * dx) * dy;
                float normalized = (val / 255.0f - mean[c]) / std[c];
                
                row_ptr[dst_x + c * target_h * target_w] = normalized;
            }
        }
    }
    
    return tensor;
}

实现4:

#include <opencv2/opencv.hpp>
#include <vector>
#include <omp.h> // 需要开启 OpenMP 选项

/**
 * @brief 高效 CPU 前处理 (支持 Gray 和 BGR -> Tensor CHW)
 * @param src 原始图像 (cv::Mat, CV_8UC1 或 CV_8UC3)
 * @param dst_data 目标内存指针 (已分配好 target_size * target_size * channels 空间)
 * @param target_size 目标尺寸 (如 640)
 */
void cpu_preprocess_optimized(const cv::Mat& src, float* dst_data, int target_size) {
    int src_w = src.cols;
    int src_h = src.rows;
    int channels = src.channels(); // 获取输入通道数 (1 或 3)

    // 1. 计算缩放比例 (Letterbox)
    float ratio = std::min((float)target_size / src_w, (float)target_size / src_h);
    int new_w = round(src_w * ratio);
    int new_h = round(src_h * ratio);
    
    // 计算居中 Padding
    int dw = (target_size - new_w) / 2;
    int dh = (target_size - new_h) / 2;

    // 初始化背景 (0.0f)
    int plane_area = target_size * target_size;
    std::fill(dst_data, dst_data + plane_area * channels, 0.0f);

    float scale = 1.0f / ratio;

    // 2. 多线程并行处理
    #pragma omp parallel for collapse(2)
    for (int y = 0; y < new_h; ++y) {
        for (int x = 0; x < new_w; ++x) {
            // 映射回原图坐标 (双线性插值)
            float src_x = (x + 0.5f) * scale - 0.5f;
            float src_y = (y + 0.5f) * scale - 0.5f;

            int x0 = (int)std::floor(src_x);
            int y0 = (int)std::floor(src_y);
            int x1 = std::min(x0 + 1, src_w - 1);
            int y1 = std::min(y0 + 1, src_h - 1);
            x0 = std::max(x0, 0);
            y0 = std::max(y0, 0);

            float ly = src_y - y0;
            float lx = src_x - x0;
            float hy = 1.0f - ly;
            float hx = 1.0f - lx;

            int dst_base_idx = (y + dh) * target_size + (x + dw);

            if (channels == 1) {
                // --- 单通道处理逻辑 ---
                float v00 = src.at<uint8_t>(y0, x0);
                float v01 = src.at<uint8_t>(y0, x1);
                float v10 = src.at<uint8_t>(y1, x0);
                float v11 = src.at<uint8_t>(y1, x1);

                float res = (v00 * hx + v01 * lx) * hy + (v10 * hx + v11 * lx) * ly;
                dst_data[dst_base_idx] = res / 255.0f;
            } 
            else {
                // --- 三通道处理逻辑 (HWC -> CHW) ---
                const uint8_t* p00 = src.ptr<uint8_t>(y0, x0);
                const uint8_t* p01 = src.ptr<uint8_t>(y0, x1);
                const uint8_t* p10 = src.ptr<uint8_t>(y1, x0);
                const uint8_t* p11 = src.ptr<uint8_t>(y1, x1);

                for (int c = 0; c < 3; ++c) {
                    float res = (p00[c] * hx + p01[c] * lx) * hy + (p10[c] * hx + p11[c] * lx) * ly;
                    // BGR -> CHW 排列:R/G/B 各占一个 plane_area
                    // 注意:YOLO 通常需要 RGB,如果输入是 BGR,这里可以写成 (2-c)*plane_area 换序
                    dst_data[c * plane_area + dst_base_idx] = res / 255.0f;
                }
            }
        }
    }
}
Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐