opencv-python,一个惊艳的 Python 库!
OpenCV-Python是计算机视觉领域的强大开源工具库,提供2500+优化算法,支持图像处理、目标识别等功能。本文详细介绍了其安装方法、基础操作(图像处理、视频分析、几何变换等)和高级应用(边缘检测、人脸识别、图像增强)。通过三个实战项目展示其应用价值:文档扫描矫正、智能安防监控和颜色追踪系统。OpenCV-Python凭借与NumPy的无缝集成和高效性能,成为计算机视觉开发的理想选择。文章鼓
一、库的简介:计算机视觉的魔法工具箱
想象一下,你的手机能够自动识别照片中的人脸并添加有趣的滤镜,停车场系统能够无感识别车牌自动抬杆,工厂流水线上的摄像头能够实时检测产品缺陷并自动分拣——这些看似科幻的场景,背后都离不开计算机视觉技术。而OpenCV-Python,正是让Python开发者能够轻松驾驭计算机视觉魔法的惊艳工具。
OpenCV(Open Source Computer Vision Library)由Intel于1999年创立,2000年发布首个版本,如今已发展成为计算机视觉领域最权威的开源库。它拥有超过2500种优化算法,涵盖图像处理、特征检测、目标识别、机器学习等各个领域。OpenCV-Python作为其Python API,完美结合了OpenCV底层C++的高效性能和Python语言的简洁易用性,所有计算密集型操作都在后台用C++执行,保证了与原始C++代码一样的速度。
在实际生活中,OpenCV-Python的应用无处不在:
-
智能安防系统:通过摄像头实时监测异常行为、人脸识别门禁、烟火检测预警
-
医疗影像辅助:自动分析X光片、CT图像中的病灶区域,辅助医生诊断
-
工业质检自动化:高速生产线上检测产品缺陷、测量尺寸精度、识别条码二维码
-
交通管理:车牌识别、违章抓拍、车流量统计、智能红绿灯控制
-
增强现实:实时追踪标记物位置,在视频中叠加虚拟物体
-
日常生活:照片美化、文档扫描矫正、手势控制设备、运动姿态分析
OpenCV-Python的核心优势在于其与NumPy的无缝集成——所有图像数据都被表示为NumPy数组,这意味着你可以直接使用NumPy的强大功能进行矩阵运算,也可以与SciPy、Matplotlib等科学计算库协同工作。这种设计让图像处理变得像操作普通数组一样简单直观。
二、安装OpenCV-Python
安装OpenCV-Python非常简单,通过pip即可完成:
bash
# 安装核心库(包含主要模块) pip install opencv-python # 安装扩展模块(包含SIFT、SURF等专利算法) pip install opencv-contrib-python # 如果只需要基础功能,无GUI依赖(适合服务器环境) pip install opencv-python-headless
由于OpenCV库较大(几十到几百MB),如果下载速度慢,可以使用国内镜像源:
bash
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple opencv-python
验证安装是否成功:
python
import cv2
import numpy as np
print(f"OpenCV版本: {cv2.__version__}")
print(f"NumPy版本: {np.__version__}")
# 检查关键模块是否可用
print(f"支持GPU加速: {cv2.cuda.getCudaEnabledDeviceCount() if hasattr(cv2, 'cuda') else '不支持'}")
三、基本用法:四步掌握OpenCV-Python
1. 图像的读取、显示与保存
OpenCV中图像的本质是一个NumPy数组,这使得操作异常灵活。
python
import cv2
import matplotlib.pyplot as plt
# 1.1 读取图像
# cv2.IMREAD_COLOR: 彩色模式(默认),返回BGR三通道
# cv2.IMREAD_GRAYSCALE: 灰度模式,返回单通道
# cv2.IMREAD_UNCHANGED: 包含alpha通道
img_color = cv2.imread('example.jpg', cv2.IMREAD_COLOR)
img_gray = cv2.imread('example.jpg', cv2.IMREAD_GRAYSCALE)
# 检查图像是否成功读取
if img_color is None:
print("图像读取失败,请检查文件路径")
exit()
print(f"彩色图像维度: {img_color.shape}") # (高度, 宽度, 通道数)
print(f"灰度图像维度: {img_gray.shape}") # (高度, 宽度)
print(f"图像数据类型: {img_color.dtype}") # uint8
# 1.2 显示图像
# 注意:OpenCV默认使用BGR颜色空间,而Matplotlib使用RGB
# 因此需要转换颜色空间才能正确显示
img_rgb = cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(img_rgb)
plt.title('彩色图像 (RGB)')
plt.axis('off')
plt.subplot(132)
plt.imshow(img_gray, cmap='gray')
plt.title('灰度图像')
plt.axis('off')
plt.show()
# 1.3 使用OpenCV窗口显示(按任意键关闭)
cv2.imshow('OpenCV窗口 - 彩色图像', img_color)
cv2.waitKey(0) # 等待按键,参数为等待毫秒数,0表示无限等待
cv2.destroyAllWindows()
# 1.4 保存图像
cv2.imwrite('output.jpg', img_color, [cv2.IMWRITE_JPEG_QUALITY, 90]) # 指定JPEG质量
cv2.imwrite('output.png', img_color) # PNG格式
2. 视频处理与摄像头调用
python
import cv2
def process_video_demo():
"""视频处理示例:打开摄像头并实时显示"""
# 2.1 打开摄像头(参数0表示第一个摄像头)
cap = cv2.VideoCapture(0)
# 2.2 或者打开视频文件
# cap = cv2.VideoCapture('video.mp4')
# 检查摄像头是否成功打开
if not cap.isOpened():
print("无法打开摄像头")
return
# 2.3 设置摄像头参数
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) # 宽度
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) # 高度
cap.set(cv2.CAP_PROP_FPS, 30) # 帧率
print(f"摄像头分辨率: {cap.get(cv2.CAP_PROP_FRAME_WIDTH)}x{cap.get(cv2.CAP_PROP_FRAME_HEIGHT)}")
# 2.4 循环读取视频帧
while True:
# 读取一帧
ret, frame = cap.read()
# ret为True表示成功读取
if not ret:
print("无法获取视频帧")
break
# 对帧进行处理:转换为灰度图
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 水平翻转(镜像效果)
flipped = cv2.flip(frame, 1)
# 显示原始帧和处理后的帧
cv2.imshow('Original', frame)
cv2.imshow('Grayscale', gray)
cv2.imshow('Flipped', flipped)
# 按'q'键退出
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 2.5 释放资源
cap.release()
cv2.destroyAllWindows()
# 视频录制示例
def record_video_demo():
"""录制视频并保存到文件"""
cap = cv2.VideoCapture(0)
# 定义视频编码器和输出文件
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 或使用 'XVID'
out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (640, 480))
while True:
ret, frame = cap.read()
if not ret:
break
# 写入帧
out.write(frame)
cv2.imshow('Recording...', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
3. 图像几何变换
python
import cv2
import numpy as np
def geometry_transform_demo(img):
"""图像几何变换示例"""
height, width = img.shape[:2]
# 3.1 缩放
# 方法1:指定目标尺寸
resized = cv2.resize(img, (width//2, height//2))
# 方法2:指定缩放比例
resized_by_scale = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR)
# 3.2 旋转
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, 45, 1.0) # 旋转45度
rotated = cv2.warpAffine(img, rotation_matrix, (width, height))
# 3.3 平移
translation_matrix = np.float32([[1, 0, 100], [0, 1, 50]]) # 向右100,向下50
translated = cv2.warpAffine(img, translation_matrix, (width, height))
# 3.4 翻转
flipped_h = cv2.flip(img, 1) # 水平翻转
flipped_v = cv2.flip(img, 0) # 垂直翻转
flipped_both = cv2.flip(img, -1) # 同时水平和垂直翻转
# 3.5 裁剪
cropped = img[100:400, 200:500] # 裁剪区域 [y_start:y_end, x_start:x_end]
# 显示结果
titles = ['Original', 'Resized', 'Rotated', 'Translated', 'Flipped H', 'Cropped']
images = [img, resized, rotated, translated, flipped_h, cropped]
for i in range(6):
plt.subplot(2, 3, i+1)
plt.imshow(cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB))
plt.title(titles[i])
plt.axis('off')
plt.show()
4. 绘图与文字标注
python
import cv2
import numpy as np
def drawing_demo():
"""在图像上绘制图形和文字"""
# 创建空白图像
img = np.zeros((512, 512, 3), dtype=np.uint8)
# 4.1 绘制线条
cv2.line(img, (50, 50), (450, 50), (255, 0, 0), 3) # 蓝色线条
# 4.2 绘制矩形
cv2.rectangle(img, (100, 100), (300, 200), (0, 255, 0), 2) # 绿色空心矩形
cv2.rectangle(img, (350, 100), (450, 200), (0, 0, 255), -1) # 红色实心矩形
# 4.3 绘制圆形
cv2.circle(img, (200, 300), 50, (255, 255, 0), 3) # 青色空心圆
cv2.circle(img, (400, 300), 40, (255, 0, 255), -1) # 紫色实心圆
# 4.4 绘制椭圆
cv2.ellipse(img, (256, 400), (100, 50), 0, 0, 360, (0, 255, 255), 2)
# 4.5 绘制多边形
pts = np.array([[50, 450], [150, 480], [200, 430], [250, 470]], np.int32)
pts = pts.reshape((-1, 1, 2))
cv2.polylines(img, [pts], True, (255, 255, 255), 2)
# 4.6 添加文字
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img, 'OpenCV Drawing', (150, 480), font, 1, (255, 255, 255), 2)
return img
四、高级用法
1. 边缘检测与图像分割
python
import cv2
import numpy as np
import matplotlib.pyplot as plt
class EdgeDetectionDemo:
"""边缘检测与图像分割高级用法"""
def __init__(self, image_path):
self.img = cv2.imread(image_path)
self.gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
def canny_edge_detection(self):
"""Canny边缘检测算法"""
# 高斯滤波平滑图像
blurred = cv2.GaussianBlur(self.gray, (5, 5), 1.5)
# Canny边缘检测
# 参数:低阈值、高阈值(通常比例为1:2或1:3)
edges = cv2.Canny(blurred, 50, 150)
# 自动计算阈值的方法(Otsu)
_, thresh = cv2.threshold(self.gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
edges_otsu = cv2.Canny(self.gray, thresh*0.4, thresh)
return edges, edges_otsu
def sobel_laplacian_demo(self):
"""Sobel和Laplacian算子"""
# Sobel算子 - 计算梯度
sobel_x = cv2.Sobel(self.gray, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(self.gray, cv2.CV_64F, 0, 1, ksize=3)
# 计算梯度幅值
sobel_mag = np.sqrt(sobel_x**2 + sobel_y**2)
sobel_mag = np.uint8(np.clip(sobel_mag, 0, 255))
# Laplacian算子 - 二阶导数
laplacian = cv2.Laplacian(self.gray, cv2.CV_64F)
laplacian = np.uint8(np.abs(laplacian))
return sobel_x, sobel_y, sobel_mag, laplacian
def contour_detection(self):
"""轮廓检测与特征提取"""
# 二值化
_, binary = cv2.threshold(self.gray, 127, 255, cv2.THRESH_BINARY)
# 查找轮廓
contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# 创建轮廓图像
contour_img = self.img.copy()
cv2.drawContours(contour_img, contours, -1, (0, 255, 0), 2)
# 提取轮廓特征
features = []
for i, contour in enumerate(contours):
area = cv2.contourArea(contour)
perimeter = cv2.arcLength(contour, True)
# 跳过太小的轮廓(噪声)
if area < 100:
continue
# 计算外接矩形
x, y, w, h = cv2.boundingRect(contour)
# 计算最小外接圆
(cx, cy), radius = cv2.minEnclosingCircle(contour)
# 计算质心
M = cv2.moments(contour)
if M['m00'] != 0:
centroid_x = int(M['m10'] / M['m00'])
centroid_y = int(M['m01'] / M['m00'])
else:
centroid_x, centroid_y = 0, 0
features.append({
'index': i,
'area': area,
'perimeter': perimeter,
'bounding_rect': (x, y, w, h),
'centroid': (centroid_x, centroid_y),
'aspect_ratio': w / h if h > 0 else 0
})
# 在图像上标注
cv2.rectangle(contour_img, (x, y), (x+w, y+h), (255, 0, 0), 1)
cv2.circle(contour_img, (centroid_x, centroid_y), 3, (0, 0, 255), -1)
return contour_img, features
def watershed_segmentation(self):
"""分水岭算法图像分割"""
# 二值化
_, binary = cv2.threshold(self.gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 形态学操作去除噪声
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
# 确定背景区域
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 确定前景区域
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# 找到未知区域
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# 标记连通域
_, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
# 应用分水岭算法
markers = cv2.watershed(self.img, markers)
# 用红色标记边界
segmented = self.img.copy()
segmented[markers == -1] = [0, 0, 255]
return segmented
2. 人脸检测与特征识别
python
import cv2
import numpy as np
class FaceDetectionDemo:
"""人脸检测与特征识别"""
def __init__(self):
# 加载预训练的分类器
# OpenCV自带的人脸检测级联分类器
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
self.eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
self.smile_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_smile.xml'
)
def detect_faces(self, img, draw=True):
"""检测图像中的人脸"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 人脸检测
faces = self.face_cascade.detectMultiScale(
gray,
scaleFactor=1.1, # 每次缩放比例
minNeighbors=5, # 每个候选区域需要保留的邻居数
minSize=(30, 30), # 最小检测尺寸
flags=cv2.CASCADE_SCALE_IMAGE
)
result_img = img.copy()
face_regions = []
for (x, y, w, h) in faces:
if draw:
cv2.rectangle(result_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
cv2.putText(result_img, 'Face', (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
face_regions.append({
'bbox': (x, y, w, h),
'roi': img[y:y+h, x:x+w]
})
# 在人脸区域内检测眼睛
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
eyes = self.eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
if draw:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 1)
return result_img, face_regions
def detect_from_camera(self):
"""实时摄像头人脸检测"""
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
# 检测人脸
result_frame, faces = self.detect_faces(frame)
# 显示检测结果
cv2.imshow('Face Detection - Press q to quit', result_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
@staticmethod
def create_face_blur(img, faces):
"""对检测到的人脸进行模糊处理(隐私保护)"""
result = img.copy()
for face in faces:
x, y, w, h = face['bbox']
# 提取人脸区域
face_roi = result[y:y+h, x:x+w]
# 应用高斯模糊
blurred = cv2.GaussianBlur(face_roi, (99, 99), 30)
# 替换回原图
result[y:y+h, x:x+w] = blurred
return result
3. 图像滤波与增强
python
import cv2
import numpy as np
class ImageFilterDemo:
"""图像滤波与增强技术"""
@staticmethod
def filter_demo(img):
"""各种滤波器效果演示"""
# 均值滤波(线性滤波)
blur = cv2.blur(img, (5, 5))
# 高斯滤波(加权平均)
gaussian = cv2.GaussianBlur(img, (5, 5), 0)
# 中值滤波(对椒盐噪声特别有效)
median = cv2.medianBlur(img, 5)
# 双边滤波(保留边缘的同时去噪)
bilateral = cv2.bilateralFilter(img, 9, 75, 75)
# 图像锐化
kernel_sharpen = np.array([[-1, -1, -1],
[-1, 9, -1],
[-1, -1, -1]])
sharpened = cv2.filter2D(img, -1, kernel_sharpen)
return {
'original': img,
'blur': blur,
'gaussian': gaussian,
'median': median,
'bilateral': bilateral,
'sharpened': sharpened
}
@staticmethod
def morphological_operations(img):
"""形态学操作"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 定义结构元素
kernel = np.ones((5, 5), np.uint8)
# 腐蚀
erosion = cv2.erode(binary, kernel, iterations=1)
# 膨胀
dilation = cv2.dilate(binary, kernel, iterations=1)
# 开运算(先腐蚀后膨胀)-> 去除噪声
opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
# 闭运算(先膨胀后腐蚀)-> 填充空洞
closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
# 形态学梯度(膨胀减腐蚀)-> 提取边界
gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel)
return {
'binary': binary,
'erosion': erosion,
'dilation': dilation,
'opening': opening,
'closing': closing,
'gradient': gradient
}
@staticmethod
def histogram_equalization(img):
"""直方图均衡化增强对比度"""
# 转换为YUV色彩空间,仅对亮度通道处理
img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
# 对Y通道进行直方图均衡化
img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
# 转换回BGR
equalized = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
# 自适应直方图均衡化(CLAHE)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
img_yuv[:,:,0] = clahe.apply(img_yuv[:,:,0])
clahe_result = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
return equalized, clahe_result
五、实际应用场景
场景一:文档扫描与矫正工具
python
import cv2
import numpy as np
class DocumentScanner:
"""文档扫描与透视矫正工具"""
def __init__(self):
self.original = None
self.processed = None
def preprocess_image(self, img):
"""图像预处理"""
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 高斯滤波去噪
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# 边缘检测
edged = cv2.Canny(blurred, 50, 150)
return edged
def find_document_contour(self, edged_img, original_img):
"""查找文档轮廓"""
# 查找轮廓
contours, _ = cv2.findContours(edged_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 按面积排序,取最大的前几个
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
document_contour = None
# 遍历轮廓,寻找近似四边形的轮廓
for contour in contours:
# 计算轮廓周长
peri = cv2.arcLength(contour, True)
# 近似多边形
approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
# 如果近似为四边形,则认为是文档
if len(approx) == 4:
document_contour = approx
break
return document_contour
def order_points(self, pts):
"""对四个点按顺时针顺序排列:左上、右上、右下、左下"""
rect = np.zeros((4, 2), dtype="float32")
# 计算和:左上角最小,右下角最大
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)] # 左上
rect[2] = pts[np.argmax(s)] # 右下
# 计算差:右上角差最小(x大y小),左下角差最大(x小y大)
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)] # 右上
rect[3] = pts[np.argmax(diff)] # 左下
return rect
def four_point_transform(self, image, pts):
"""四点透视变换"""
# 获取点坐标并按顺序排列
rect = self.order_points(pts)
(tl, tr, br, bl) = rect
# 计算目标图像的宽度
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# 计算目标图像的高度
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 目标点集
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]
], dtype="float32")
# 计算透视变换矩阵并应用
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
def scan_document(self, image_path):
"""扫描文档主流程"""
# 读取图像
self.original = cv2.imread(image_path)
if self.original is None:
print("无法读取图像")
return None
# 保存原始图像的副本
img_copy = self.original.copy()
# 图像预处理
edged = self.preprocess_image(self.original)
# 查找文档轮廓
contour = self.find_document_contour(edged, self.original)
if contour is None:
print("未能检测到文档轮廓")
return None
# 在原始图像上绘制轮廓
cv2.drawContours(img_copy, [contour], -1, (0, 255, 0), 2)
# 应用透视变换
warped = self.four_point_transform(self.original, contour.reshape(4, 2))
# 对扫描结果进行后处理(二值化增强)
gray_warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray_warped, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return {
'original': self.original,
'with_contour': img_copy,
'edged': edged,
'warped': warped,
'scanned': thresh
}
def batch_scan(self, image_paths, output_dir):
"""批量扫描文档"""
import os
results = []
for i, path in enumerate(image_paths):
print(f"正在处理第 {i+1} 张图片...")
result = self.scan_document(path)
if result:
# 保存扫描结果
output_path = os.path.join(output_dir, f'scanned_{i}.jpg')
cv2.imwrite(output_path, result['scanned'])
results.append(output_path)
print(f"已保存: {output_path}")
else:
print(f"处理失败: {path}")
return results
# 使用示例
def document_scanner_demo():
scanner = DocumentScanner()
# 单张扫描
results = scanner.scan_document('document_photo.jpg')
if results:
# 显示结果
plt.figure(figsize=(15, 5))
plt.subplot(131)
plt.imshow(cv2.cvtColor(results['original'], cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
plt.subplot(132)
plt.imshow(results['edged'], cmap='gray')
plt.title('边缘检测')
plt.axis('off')
plt.subplot(133)
plt.imshow(results['scanned'], cmap='gray')
plt.title('扫描结果')
plt.axis('off')
plt.show()
# 保存扫描结果
cv2.imwrite('scanned_document.jpg', results['scanned'])
# 批量扫描
# scanner.batch_scan(['doc1.jpg', 'doc2.jpg'], './scanned_docs/')
场景二:运动检测与安防监控
python
import cv2
import numpy as np
import time
from datetime import datetime
class MotionDetector:
"""运动检测与安防监控系统"""
def __init__(self, threshold=25, min_area=500):
self.threshold = threshold # 运动检测灵敏度
self.min_area = min_area # 最小运动区域面积
self.first_frame = None # 背景帧
self.motion_history = [] # 运动历史记录
self.recording = False # 是否正在录像
self.video_writer = None # 视频写入器
def detect_motion(self, frame):
"""检测运动区域"""
# 转换为灰度图
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
# 初始化背景帧
if self.first_frame is None:
self.first_frame = gray
return None, 0
# 计算当前帧与背景帧的差异
frame_delta = cv2.absdiff(self.first_frame, gray)
thresh = cv2.threshold(frame_delta, self.threshold, 255, cv2.THRESH_BINARY)[1]
# 膨胀操作填充空洞
thresh = cv2.dilate(thresh, None, iterations=2)
# 查找轮廓
contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
motion_detected = False
motion_regions = []
total_area = 0
for contour in contours:
# 过滤小面积噪声
if cv2.contourArea(contour) < self.min_area:
continue
motion_detected = True
(x, y, w, h) = cv2.boundingRect(contour)
motion_regions.append((x, y, w, h))
total_area += w * h
return motion_regions, total_area
def run_surveillance(self, camera_id=0, save_video=True):
"""运行监控系统"""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print("无法打开摄像头")
return
# 获取摄像头参数
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"监控系统启动 - 分辨率: {width}x{height}, FPS: {fps}")
print("按 'q' 退出,按 'r' 开始/停止录像,按 'b' 重置背景")
motion_count = 0
alert_cooldown = 0
recording_start_time = None
while True:
ret, frame = cap.read()
if not ret:
break
# 镜像显示(更符合习惯)
frame = cv2.flip(frame, 1)
# 运动检测
motion_regions, total_area = self.detect_motion(frame)
# 显示当前时间
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cv2.putText(frame, timestamp, (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
# 处理检测到的运动
if motion_regions:
motion_count += 1
# 绘制运动区域
for (x, y, w, h) in motion_regions:
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
# 显示运动信息
cv2.putText(frame, f"Motion: {len(motion_regions)} areas", (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
# 触发警报(保存图片、开始录像等)
if alert_cooldown <= 0:
self.trigger_alert(frame)
alert_cooldown = 30 # 30帧内不重复触发
# 自动开始录像
if save_video and not self.recording:
self.start_recording(width, height, fps)
recording_start_time = time.time()
else:
cv2.putText(frame, "No Motion", (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
# 如果超过10秒没有运动,停止录像
if self.recording and recording_start_time and \
(time.time() - recording_start_time) > 10:
self.stop_recording()
recording_start_time = None
# 更新冷却时间
if alert_cooldown > 0:
alert_cooldown -= 1
# 显示录像状态
if self.recording:
cv2.putText(frame, "REC", (width - 80, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.circle(frame, (width - 20, 25), 8, (0, 0, 255), -1)
# 显示帧
cv2.imshow('Motion Surveillance System', frame)
# 如果正在录像,写入帧
if self.recording and self.video_writer:
self.video_writer.write(frame)
# 按键处理
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('r'):
if not self.recording:
self.start_recording(width, height, fps)
else:
self.stop_recording()
elif key == ord('b'):
# 重置背景帧
self.first_frame = None
print("背景已重置")
# 清理资源
if self.recording:
self.stop_recording()
cap.release()
cv2.destroyAllWindows()
print(f"监控结束 - 共检测到 {motion_count} 次运动")
def trigger_alert(self, frame):
"""触发警报"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# 保存当前帧作为证据
filename = f"alert_{timestamp}.jpg"
cv2.imwrite(filename, frame)
print(f"警报触发,已保存图片: {filename}")
# 可以在这里添加发送邮件、推送通知等功能
def start_recording(self, width, height, fps):
"""开始录像"""
if self.recording:
return
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"recording_{timestamp}.avi"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
self.video_writer = cv2.VideoWriter(filename, fourcc, fps, (width, height))
self.recording = True
print(f"开始录像: {filename}")
def stop_recording(self):
"""停止录像"""
if self.video_writer:
self.video_writer.release()
self.video_writer = None
self.recording = False
print("录像已停止")
def analyze_motion_history(self):
"""分析运动历史"""
if not self.motion_history:
print("无历史数据")
return
import matplotlib.pyplot as plt
timestamps = [record[0] for record in self.motion_history]
areas = [record[1] for record in self.motion_history]
plt.figure(figsize=(12, 4))
plt.plot(timestamps, areas)
plt.title('运动强度随时间变化')
plt.xlabel('时间')
plt.ylabel('运动面积')
plt.grid(True)
plt.show()
# 使用示例
def motion_detector_demo():
detector = MotionDetector(threshold=25, min_area=1000)
# 运行监控
detector.run_surveillance(camera_id=0, save_video=True)
# 分析历史数据
detector.analyze_motion_history()
场景三:颜色检测与物体追踪
python
import cv2
import numpy as np
class ColorTracker:
"""基于颜色的物体追踪系统"""
def __init__(self):
# 定义常见颜色的HSV范围
self.color_ranges = {
'red': [
(np.array([0, 100, 100]), np.array([10, 255, 255])), # 红色范围1
(np.array([160, 100, 100]), np.array([179, 255, 255])) # 红色范围2
],
'green': [(np.array([40, 40, 40]), np.array([80, 255, 255]))],
'blue': [(np.array([100, 100, 100]), np.array([130, 255, 255]))],
'yellow': [(np.array([20, 100, 100]), np.array([35, 255, 255]))],
'orange': [(np.array([10, 100, 100]), np.array([20, 255, 255]))],
'purple': [(np.array([130, 100, 100]), np.array([160, 255, 255]))],
'white': [(np.array([0, 0, 200]), np.array([180, 30, 255]))],
'black': [(np.array([0, 0, 0]), np.array([180, 255, 30]))]
}
self.selected_color = 'blue' # 默认追踪蓝色
self.tracking = False
self.tracked_object = None
self.trail = [] # 运动轨迹
def detect_color(self, frame, color_name):
"""检测指定颜色的区域"""
# 转换为HSV色彩空间(更适合颜色分割)
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# 获取该颜色的HSV范围
ranges = self.color_ranges.get(color_name, [])
if not ranges:
return None
# 创建掩码
mask = np.zeros(hsv.shape[:2], dtype=np.uint8)
for lower, upper in ranges:
current_mask = cv2.inRange(hsv, lower, upper)
mask = cv2.bitwise_or(mask, current_mask)
# 形态学操作去除噪声
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# 查找轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return None
# 找出最大的轮廓
largest_contour = max(contours, key=cv2.contourArea)
# 计算外接矩形和中心点
x, y, w, h = cv2.boundingRect(largest_contour)
center = (x + w // 2, y + h // 2)
return {
'contour': largest_contour,
'bbox': (x, y, w, h),
'center': center,
'area': w * h,
'mask': mask
}
def track_object(self, frame):
"""追踪物体"""
if not self.tracking:
return frame
result = self.detect_color(frame, self.selected_color)
if result:
x, y, w, h = result['bbox']
center = result['center']
# 绘制边界框
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
# 绘制中心点
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# 绘制轮廓
cv2.drawContours(frame, [result['contour']], -1, (255, 0, 0), 2)
# 添加标签
label = f"{self.selected_color} ({w}x{h})"
cv2.putText(frame, label, (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# 更新轨迹
self.trail.append(center)
if len(self.trail) > 50:
self.trail.pop(0)
# 绘制轨迹
for i in range(1, len(self.trail)):
if self.trail[i-1] and self.trail[i]:
cv2.line(frame, self.trail[i-1], self.trail[i], (0, 255, 255), 2)
self.tracked_object = result
else:
cv2.putText(frame, f"No {self.selected_color} object found", (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
return frame
def create_color_picker(self, frame):
"""创建颜色选择器窗口"""
# 显示可用颜色
y_offset = 100
for i, color in enumerate(self.color_ranges.keys()):
# 高亮当前选中颜色
if color == self.selected_color:
cv2.putText(frame, f"> {color}", (10, y_offset + i*25),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
else:
cv2.putText(frame, f" {color}", (10, y_offset + i*25),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 1)
# 操作提示
instructions = [
"Commands:",
"1-8: Select color",
"t: Start/Stop tracking",
"c: Clear trail",
"q: Quit"
]
for i, instr in enumerate(instructions):
cv2.putText(frame, instr, (frame.shape[1] - 300, 30 + i*25),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)
def run(self):
"""运行颜色追踪系统"""
cap = cv2.VideoCapture(0)
print("颜色追踪系统启动")
print("可用颜色: " + ", ".join(self.color_ranges.keys()))
print("按键1-8选择对应颜色,t键开始/停止追踪,c键清除轨迹,q键退出")
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
# 应用追踪
if self.tracking:
frame = self.track_object(frame)
# 显示颜色选择器
self.create_color_picker(frame)
# 显示状态
status = f"Tracking: {'ON' if self.tracking else 'OFF'} - Color: {self.selected_color}"
cv2.putText(frame, status, (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
cv2.imshow('Color Tracker', frame)
# 按键处理
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('t'):
self.tracking = not self.tracking
if not self.tracking:
self.trail = []
elif key == ord('c'):
self.trail = []
elif ord('1') <= key <= ord('8'):
colors = list(self.color_ranges.keys())
idx = key - ord('1')
if idx < len(colors):
self.selected_color = colors[idx]
print(f"切换到颜色: {self.selected_color}")
cap.release()
cv2.destroyAllWindows()
# 使用示例
def color_tracker_demo():
tracker = ColorTracker()
tracker.run()
六、结尾与互动
OpenCV-Python无疑是计算机视觉领域的一颗璀璨明珠,它用惊艳的能力将复杂的视觉算法变得触手可及。从本文的介绍中,我们不仅学习了图像的读写显示、视频处理、几何变换等基础操作,更深入探索了边缘检测、人脸识别、图像滤波等高级技术,并通过文档扫描、运动监控、颜色追踪三个完整的实战项目,展示了OpenCV-Python在实际生活中的强大应用价值。
计算机视觉正在以前所未有的速度改变着我们的世界,而OpenCV作为这一领域的开源基石,为无数创新应用提供了坚实的基础。无论是初学者想要踏入计算机视觉的大门,还是专业开发者需要快速实现视觉功能,OpenCV-Python都是一个惊艳且强大的选择。更重要的是,它与深度学习框架的无缝集成,让开发者能够构建从传统图像处理到现代AI视觉的完整解决方案。
现在,轮到你动手尝试了!你是否想过用OpenCV实现一个有趣的创意?是智能家居中的人脸识别门禁,还是运动健身的姿势分析,或者是宠物自动投喂器中的动物检测?欢迎在评论区分享你的想法和项目经验。如果你在使用OpenCV时遇到过难题,或者有独到的优化技巧,也请不吝分享,让我们共同推动计算机视觉技术的发展!期待看到你的精彩作品!
更多推荐
所有评论(0)