这段代码通过官方预训练的YOLOv5模型获取图像中的人体区域,将人体区域按比例缩放以后,通过MediaPipe框架检测区域中的人体关键点。主包之前在这段代码基础上进行了一些改进,参加了2025年华东杯数学建模邀请赛A题,分析跳台滑雪比赛中优秀运动员的比赛影像,计算他们在各个过程中的姿态角,用cv算法的分析结果佐证我们用数学建模方法算出来的结果,最后也是成功拿到了奖金(doge)

import torch
import cv2
import os
import numpy as np
from datetime import datetime

# 加载YOLOv5模型检测人类
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', trust_repo=True)
model.classes = [0]
model.conf = 0.5

# 在yolov5加载完成后导入,否则会与yolo冲突导致程序结束
import mediapipe as mp

# 初始化MediaPipe组件
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# 创建图片保存目录
photo_folder = os.path.join(os.path.expanduser("~/Desktop"), "photo")
os.makedirs(photo_folder, exist_ok=True)

# 保持宽高比的智能缩放函数
def adaptive_roi_resize(img, target_size=256):
    h, w = img.shape[:2]
    scale = target_size / max(h, w)
    new_h, new_w = int(h * scale), int(w * scale)
    resized = cv2.resize(img, (new_w, new_h))
    canvas = np.zeros((target_size, target_size, 3), dtype=np.uint8)
    y_offset = (target_size - new_h) // 2
    x_offset = (target_size - new_w) // 2
    canvas[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized
    return canvas, (x_offset, y_offset, new_w, new_h)

# 读取图像
frame = cv2.imread('image.png')
if frame is None:
    print("无法读取图像 image.png")
else:
    results = model(frame[:, :, ::-1], size=640)
    detections = results.pred[0].cpu().numpy()

    marked_frame = frame.copy()
    h, w = frame.shape[:2]

    with mp_pose.Pose(
        static_image_mode=True,
        model_complexity=2,
        enable_segmentation=True,
        min_detection_confidence=0.5
    ) as pose:
        for det in detections:
            x1, y1, x2, y2, conf, _ = det
            box_h = y2 - y1
            expand_ratio = 0.8 + (box_h / h) * 0.5
            box_w = x2 - x1

            x1_exp = max(0, int(x1 - box_w * expand_ratio))
            y1_exp = max(0, int(y1 - box_h * expand_ratio))
            x2_exp = min(w, int(x2 + box_w * expand_ratio))
            y2_exp = min(h, int(y2 + box_h * expand_ratio))

            person_roi = frame[y1_exp:y2_exp, x1_exp:x2_exp]
            if person_roi.size == 0:
                continue

            resized_roi, (pad_x, pad_y, roi_w, roi_h) = adaptive_roi_resize(person_roi)
            results_pose = pose.process(cv2.cvtColor(resized_roi, cv2.COLOR_BGR2RGB))

            if results_pose.pose_landmarks:
                scale_x = (x2_exp - x1_exp) / roi_w
                scale_y = (y2_exp - y1_exp) / roi_h

                # 绘制连接骨架
                for connection in mp_pose.POSE_CONNECTIONS:
                    start_idx, end_idx = connection
                    start = results_pose.pose_landmarks.landmark[start_idx]
                    end = results_pose.pose_landmarks.landmark[end_idx]

                    start_x = int((start.x * 256 - pad_x) * scale_x + x1_exp)
                    start_y = int((start.y * 256 - pad_y) * scale_y + y1_exp)
                    end_x = int((end.x * 256 - pad_x) * scale_x + x1_exp)
                    end_y = int((end.y * 256 - pad_y) * scale_y + y1_exp)

                    if 0 <= start_x < w and 0 <= start_y < h and 0 <= end_x < w and 0 <= end_y < h:
                        cv2.line(marked_frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)

                # 绘制关键点
                for landmark in results_pose.pose_landmarks.landmark:
                    abs_x = int((landmark.x * 256 - pad_x) * scale_x + x1_exp)
                    abs_y = int((landmark.y * 256 - pad_y) * scale_y + y1_exp)
                    if 0 <= abs_x < w and 0 <= abs_y < h:
                        cv2.circle(marked_frame, (abs_x, abs_y), 4, (0, 200, 255), -1)

                # 绘制ROI边框
                cv2.rectangle(marked_frame,
                              (x1_exp, y1_exp), (x2_exp, y2_exp),
                              (0, 255, 0), 2, cv2.LINE_AA)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = os.path.join(photo_folder, f"result_{timestamp}.jpg")
    cv2.imwrite(output_path, marked_frame)
    print(f"结果已保存至:{output_path}")

MediaPipe的精度没有那么好,如果是追求推理速度可以用这个模型,如果对精度要求更高,可以试一下YOLO-pose系列

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐