OpenCV与深度学习:基于YOLOv5与MediaPipe框架的人体姿态检测
这段代码通过官方预训练的YOLOv5模型获取图像中的人体区域,将人体区域按比例缩放以后,通过MediaPipe框架检测区域中的人体关键点。主包之前在这段代码基础上进行了一些改进,参加了2025年华东杯数学建模邀请赛A题,分析跳台滑雪比赛中优秀运动员的比赛影像,计算他们在各个过程中的姿态角,用cv算法的分析结果佐证我们用数学建模方法算出来的结果,最后也是成功拿到了奖金(doge)MediaPipe的
·
这段代码通过官方预训练的YOLOv5模型获取图像中的人体区域,将人体区域按比例缩放以后,通过MediaPipe框架检测区域中的人体关键点。主包之前在这段代码基础上进行了一些改进,参加了2025年华东杯数学建模邀请赛A题,分析跳台滑雪比赛中优秀运动员的比赛影像,计算他们在各个过程中的姿态角,用cv算法的分析结果佐证我们用数学建模方法算出来的结果,最后也是成功拿到了奖金(doge)
import torch
import cv2
import os
import numpy as np
from datetime import datetime
# 加载YOLOv5模型检测人类
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', trust_repo=True)
model.classes = [0]
model.conf = 0.5
# 在yolov5加载完成后导入,否则会与yolo冲突导致程序结束
import mediapipe as mp
# 初始化MediaPipe组件
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
# 创建图片保存目录
photo_folder = os.path.join(os.path.expanduser("~/Desktop"), "photo")
os.makedirs(photo_folder, exist_ok=True)
# 保持宽高比的智能缩放函数
def adaptive_roi_resize(img, target_size=256):
h, w = img.shape[:2]
scale = target_size / max(h, w)
new_h, new_w = int(h * scale), int(w * scale)
resized = cv2.resize(img, (new_w, new_h))
canvas = np.zeros((target_size, target_size, 3), dtype=np.uint8)
y_offset = (target_size - new_h) // 2
x_offset = (target_size - new_w) // 2
canvas[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized
return canvas, (x_offset, y_offset, new_w, new_h)
# 读取图像
frame = cv2.imread('image.png')
if frame is None:
print("无法读取图像 image.png")
else:
results = model(frame[:, :, ::-1], size=640)
detections = results.pred[0].cpu().numpy()
marked_frame = frame.copy()
h, w = frame.shape[:2]
with mp_pose.Pose(
static_image_mode=True,
model_complexity=2,
enable_segmentation=True,
min_detection_confidence=0.5
) as pose:
for det in detections:
x1, y1, x2, y2, conf, _ = det
box_h = y2 - y1
expand_ratio = 0.8 + (box_h / h) * 0.5
box_w = x2 - x1
x1_exp = max(0, int(x1 - box_w * expand_ratio))
y1_exp = max(0, int(y1 - box_h * expand_ratio))
x2_exp = min(w, int(x2 + box_w * expand_ratio))
y2_exp = min(h, int(y2 + box_h * expand_ratio))
person_roi = frame[y1_exp:y2_exp, x1_exp:x2_exp]
if person_roi.size == 0:
continue
resized_roi, (pad_x, pad_y, roi_w, roi_h) = adaptive_roi_resize(person_roi)
results_pose = pose.process(cv2.cvtColor(resized_roi, cv2.COLOR_BGR2RGB))
if results_pose.pose_landmarks:
scale_x = (x2_exp - x1_exp) / roi_w
scale_y = (y2_exp - y1_exp) / roi_h
# 绘制连接骨架
for connection in mp_pose.POSE_CONNECTIONS:
start_idx, end_idx = connection
start = results_pose.pose_landmarks.landmark[start_idx]
end = results_pose.pose_landmarks.landmark[end_idx]
start_x = int((start.x * 256 - pad_x) * scale_x + x1_exp)
start_y = int((start.y * 256 - pad_y) * scale_y + y1_exp)
end_x = int((end.x * 256 - pad_x) * scale_x + x1_exp)
end_y = int((end.y * 256 - pad_y) * scale_y + y1_exp)
if 0 <= start_x < w and 0 <= start_y < h and 0 <= end_x < w and 0 <= end_y < h:
cv2.line(marked_frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
# 绘制关键点
for landmark in results_pose.pose_landmarks.landmark:
abs_x = int((landmark.x * 256 - pad_x) * scale_x + x1_exp)
abs_y = int((landmark.y * 256 - pad_y) * scale_y + y1_exp)
if 0 <= abs_x < w and 0 <= abs_y < h:
cv2.circle(marked_frame, (abs_x, abs_y), 4, (0, 200, 255), -1)
# 绘制ROI边框
cv2.rectangle(marked_frame,
(x1_exp, y1_exp), (x2_exp, y2_exp),
(0, 255, 0), 2, cv2.LINE_AA)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(photo_folder, f"result_{timestamp}.jpg")
cv2.imwrite(output_path, marked_frame)
print(f"结果已保存至:{output_path}")
MediaPipe的精度没有那么好,如果是追求推理速度可以用这个模型,如果对精度要求更高,可以试一下YOLO-pose系列
更多推荐
所有评论(0)