OpenCV:從像素到智慧視覺系統的全面指南
本文全面介绍了OpenCV计算机视觉库,从基础到高级应用共分为四个部分:基础入门、进阶技巧、高级应用与实战、性能优化与最佳实践。第一部分讲解了OpenCV的环境配置、图像处理基础(色彩空间转换、阈值处理等)和基本操作(缩放、旋转等)。第二部分深入图像滤波、形态学操作、特征检测和轮廓分析等进阶内容。第三部分展示了物体检测与追踪、深度学习集成、实时图像处理等高级应用,并提供了智能停车场系统和增强现实两
OpenCV:從像素到智慧視覺系統的全面指南
第一部分:OpenCV入門基礎
第1章 OpenCV簡介與發展歷程
1.1 什麼是OpenCV?
OpenCV(Open Source Computer Vision Library)是一個開源的電腦視覺和機器學習軟體庫,由Intel於1999年首次發布。這個跨平台庫包含了超過2500個優化演算法,涵蓋了電腦視覺的各個領域:人臉識別、物件檢測、影像分割、運動追蹤、增強現實等。如今,OpenCV已成為學術界和工業界最廣泛使用的電腦視覺庫之一,每天有數百萬次下載,被應用於從手機App到自動駕駛系統的各個領域。
1.2 OpenCV的核心優勢
-
開源免費:BSD許可證允許商業和研究使用
-
跨平台:支援Windows、Linux、macOS、iOS、Android
-
多語言綁定:原生C++,並有Python、Java、C#等接口
-
高效能:底層使用C/C++編寫,並針對Intel處理器優化
-
豐富的功能:從基礎影像處理到深度學習模型部署
-
活躍的社群:龐大的開發者社群和豐富的文檔資源
1.3 OpenCV的發展歷程
-
1999年:Intel研究院啟動了OpenCV項目
-
2000年:發布第一個alpha版本
-
2006年:OpenCV 1.0發布
-
2009年:OpenCV 2.0發布,引入了C++接口
-
2015年:OpenCV 3.0發布,模組化設計
-
2018年:OpenCV 4.0發布,專注於深度學習和性能優化
-
2020年至今:OpenCV 5.x開發中,持續加入新演算法和優化
第2章 OpenCV環境配置
2.1 Python環境安裝
python
# 使用pip安裝OpenCV(完整版)
pip install opencv-python # 基礎模組
pip install opencv-contrib-python # 包含額外模組
# 驗證安裝
import cv2
print(f"OpenCV版本: {cv2.__version__}")
# 檢查是否安裝正確
import numpy as np
print(f"NumPy版本: {np.__version__}")
2.2 C++環境配置(Linux)
bash
# Ubuntu/Debian系統安裝
sudo apt update
sudo apt install build-essential cmake git
sudo apt install libopencv-dev python3-opencv
# 編譯安裝最新版本
git clone https://github.com/opencv/opencv.git
git clone https://github.com/opencv/opencv_contrib.git
cd opencv
mkdir build && cd build
cmake -D CMAKE_BUILD_TYPE=RELEASE \
-D CMAKE_INSTALL_PREFIX=/usr/local \
-D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
-D WITH_CUDA=OFF \
-D ENABLE_CXX11=ON \
-D BUILD_EXAMPLES=ON ..
make -j$(nproc)
sudo make install
2.3 基本影像讀取與顯示
python
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 方法1:使用OpenCV讀取影像
img = cv2.imread('image.jpg') # 讀取影像,BGR格式
# 方法2:使用Matplotlib顯示(需要轉換顏色空間)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.axis('off')
plt.show()
# 方法3:使用OpenCV顯示
cv2.imshow('Image', img)
cv2.waitKey(0) # 等待按鍵
cv2.destroyAllWindows()
# 影像屬性
print(f"影像形狀: {img.shape}") # (高度, 寬度, 通道數)
print(f"影像大小: {img.size}") # 像素總數
print(f"資料類型: {img.dtype}") # 資料類型
第3章 影像處理基礎
3.1 色彩空間轉換
python
# 色彩空間轉換示例
img = cv2.imread('image.jpg')
# BGR轉灰度圖
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# BGR轉HSV(色調、飽和度、明度)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# BGR轉LAB(亮度、綠色-紅色、藍色-黃色)
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
# BGR轉YCrCb(亮度、紅色色差、藍色色差)
ycrb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
# 分離和合併通道
b, g, r = cv2.split(img)
merged = cv2.merge([b, g, r])
# 顯示各種色彩空間
plt.figure(figsize=(15, 8))
plt.subplot(2, 3, 1), plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), plt.title('原始影像')
plt.subplot(2, 3, 2), plt.imshow(gray, cmap='gray'), plt.title('灰度圖')
plt.subplot(2, 3, 3), plt.imshow(cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)), plt.title('HSV')
plt.subplot(2, 3, 4), plt.imshow(cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)), plt.title('LAB')
plt.subplot(2, 3, 5), plt.imshow(cv2.cvtColor(ycrb, cv2.COLOR_YCrCb2RGB)), plt.title('YCrCb')
plt.subplot(2, 3, 6), plt.imshow(cv2.cvtColor(merged, cv2.COLOR_BGR2RGB)), plt.title('合併通道')
plt.tight_layout()
plt.show()
3.2 影像基本操作
python
# 影像幾何變換
# 1. 縮放
height, width = img.shape[:2]
resized = cv2.resize(img, (width//2, height//2)) # 縮小一半
# 保持長寬比的縮放
scale_percent = 50 # 縮放百分比
new_width = int(width * scale_percent / 100)
new_height = int(height * scale_percent / 100)
dim = (new_width, new_height)
resized_proportional = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
# 2. 旋轉
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, 45, 1.0) # 旋轉45度,縮放1.0
rotated = cv2.warpAffine(img, M, (w, h))
# 3. 平移
M = np.float32([[1, 0, 100], [0, 1, 50]]) # x方向平移100,y方向平移50
translated = cv2.warpAffine(img, M, (w, h))
# 4. 仿射變換
pts1 = np.float32([[50, 50], [200, 50], [50, 200]])
pts2 = np.float32([[10, 100], [200, 50], [100, 250]])
M_affine = cv2.getAffineTransform(pts1, pts2)
affine = cv2.warpAffine(img, M_affine, (w, h))
# 5. 透視變換
pts1 = np.float32([[56, 65], [368, 52], [28, 387], [389, 390]])
pts2 = np.float32([[0, 0], [300, 0], [0, 300], [300, 300]])
M_perspective = cv2.getPerspectiveTransform(pts1, pts2)
perspective = cv2.warpPerspective(img, M_perspective, (300, 300))
# 顯示所有變換結果
transformations = [
("原始影像", img),
("縮放", resized_proportional),
("旋轉45度", rotated),
("平移", translated),
("仿射變換", affine),
("透視變換", perspective)
]
plt.figure(figsize=(15, 10))
for i, (title, img_t) in enumerate(transformations, 1):
plt.subplot(2, 3, i)
plt.imshow(cv2.cvtColor(img_t, cv2.COLOR_BGR2RGB))
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
3.3 影像閾值處理
python
# 讀取灰度影像
gray_img = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# 1. 簡單閾值處理
ret, thresh1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO_INV)
# 2. 自適應閾值處理
thresh6 = cv2.adaptiveThreshold(gray_img, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 11, 2)
thresh7 = cv2.adaptiveThreshold(gray_img, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# 3. Otsu閾值處理(自動尋找最佳閾值)
ret, thresh8 = cv2.threshold(gray_img, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 顯示閾值處理結果
titles = ['Original', 'BINARY', 'BINARY_INV', 'TRUNC',
'TOZERO', 'TOZERO_INV', 'ADAPTIVE_MEAN',
'ADAPTIVE_GAUSSIAN', "OTSU"]
images = [gray_img, thresh1, thresh2, thresh3, thresh4,
thresh5, thresh6, thresh7, thresh8]
plt.figure(figsize=(15, 10))
for i in range(9):
plt.subplot(3, 3, i+1)
plt.imshow(images[i], 'gray')
plt.title(titles[i])
plt.axis('off')
plt.tight_layout()
plt.show()
第二部分:OpenCV進階技巧
第4章 影像濾波與增強
4.1 卷積與濾波器
python
# 自定義卷積核
img = cv2.imread('image.jpg')
# 創建自定義卷積核
kernel_identity = np.array([[0, 0, 0],
[0, 1, 0],
[0, 0, 0]])
kernel_sharpen = np.array([[0, -1, 0],
[-1, 5, -1],
[0, -1, 0]])
kernel_blur = np.ones((5, 5), np.float32) / 25
kernel_edge_detect = np.array([[-1, -1, -1],
[-1, 8, -1],
[-1, -1, -1]])
# 應用卷積
filtered_identity = cv2.filter2D(img, -1, kernel_identity)
filtered_sharpen = cv2.filter2D(img, -1, kernel_sharpen)
filtered_blur = cv2.filter2D(img, -1, kernel_blur)
filtered_edge = cv2.filter2D(img, -1, kernel_edge_detect)
# 高斯濾波器
gaussian_blur = cv2.GaussianBlur(img, (5, 5), 0)
# 中值濾波器(去除椒鹽噪聲)
# 首先添加噪聲
def add_salt_pepper_noise(image, prob):
output = np.copy(image)
# 椒鹽噪聲
salt = np.ceil(prob * image.size * 0.5)
coords = [np.random.randint(0, i-1, int(salt)) for i in image.shape]
output[coords[0], coords[1], :] = 255
pepper = np.ceil(prob * image.size * 0.5)
coords = [np.random.randint(0, i-1, int(pepper)) for i in image.shape]
output[coords[0], coords[1], :] = 0
return output
noisy_img = add_salt_pepper_noise(img, 0.01)
median_blur = cv2.medianBlur(noisy_img, 5)
# 雙邊濾波器(保邊濾波)
bilateral_filter = cv2.bilateralFilter(img, 9, 75, 75)
# 顯示濾波結果
filters = [
("原始影像", img),
("銳化", filtered_sharpen),
("均值模糊", filtered_blur),
("邊緣檢測", filtered_edge),
("高斯模糊", gaussian_blur),
("椒鹽噪聲", noisy_img),
("中值濾波", median_blur),
("雙邊濾波", bilateral_filter)
]
plt.figure(figsize=(15, 10))
for i, (title, img_f) in enumerate(filters, 1):
plt.subplot(2, 4, i)
plt.imshow(cv2.cvtColor(img_f, cv2.COLOR_BGR2RGB))
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
4.2 形態學操作
python
# 形態學操作示例
img_gray = cv2.imread('text_image.jpg', cv2.IMREAD_GRAYSCALE)
_, binary_img = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)
# 定義結構元素
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
kernel_cross = cv2.getStructuringElement(cv2.MORPH_CROSS, (5, 5))
# 腐蝕(消除邊界點,使影像縮小)
erosion_rect = cv2.erode(binary_img, kernel, iterations=1)
erosion_ellipse = cv2.erode(binary_img, kernel_ellipse, iterations=1)
erosion_cross = cv2.erode(binary_img, kernel_cross, iterations=1)
# 膨脹(將邊界向外部擴張,使影像擴大)
dilation_rect = cv2.dilate(binary_img, kernel, iterations=1)
dilation_ellipse = cv2.dilate(binary_img, kernel_ellipse, iterations=1)
dilation_cross = cv2.dilate(binary_img, kernel_cross, iterations=1)
# 開運算(先腐蝕後膨脹,去除小物件)
opening = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, kernel)
# 閉運算(先膨脹後腐蝕,填充小空洞)
closing = cv2.morphologyEx(binary_img, cv2.MORPH_CLOSE, kernel)
# 形態學梯度(膨脹-腐蝕,獲取物體邊緣)
gradient = cv2.morphologyEx(binary_img, cv2.MORPH_GRADIENT, kernel)
# 頂帽運算(原始影像-開運算,獲取小細節)
tophat = cv2.morphologyEx(binary_img, cv2.MORPH_TOPHAT, kernel)
# 黑帽運算(閉運算-原始影像,獲取暗部細節)
blackhat = cv2.morphologyEx(binary_img, cv2.MORPH_BLACKHAT, kernel)
# 顯示形態學操作結果
morph_ops = [
("原始二值影像", binary_img),
("腐蝕(RECT)", erosion_rect),
("腐蝕(ELLIPSE)", erosion_ellipse),
("腐蝕(CROSS)", erosion_cross),
("膨脹(RECT)", dilation_rect),
("膨脹(ELLIPSE)", dilation_ellipse),
("膨脹(CROSS)", dilation_cross),
("開運算", opening),
("閉運算", closing),
("形態學梯度", gradient),
("頂帽運算", tophat),
("黑帽運算", blackhat)
]
plt.figure(figsize=(15, 12))
for i, (title, img_m) in enumerate(morph_ops, 1):
plt.subplot(4, 3, i)
plt.imshow(img_m, cmap='gray')
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
4.3 影像金字塔
python
# 影像金字塔
img = cv2.imread('image.jpg')
# 高斯金字塔
layer = img.copy()
gaussian_pyramid = [layer]
for i in range(6):
layer = cv2.pyrDown(layer) # 下採樣
gaussian_pyramid.append(layer)
# 拉普拉斯金字塔
layer = gaussian_pyramid[5]
laplacian_pyramid = [layer]
for i in range(5, 0, -1):
size = (gaussian_pyramid[i-1].shape[1], gaussian_pyramid[i-1].shape[0])
gaussian_expanded = cv2.pyrUp(gaussian_pyramid[i], dstsize=size)
laplacian = cv2.subtract(gaussian_pyramid[i-1], gaussian_expanded)
laplacian_pyramid.append(laplacian)
# 影像混合(使用金字塔)
def blend_images(img1, img2, mask):
# 構建高斯金字塔
G1 = img1.copy()
G2 = img2.copy()
GM = mask.copy()
gp1 = [G1]
gp2 = [G2]
gpM = [GM]
for i in range(6):
G1 = cv2.pyrDown(G1)
G2 = cv2.pyrDown(G2)
GM = cv2.pyrDown(GM)
gp1.append(G1)
gp2.append(G2)
gpM.append(GM)
# 構建拉普拉斯金字塔
lp1 = [gp1[5]]
lp2 = [gp2[5]]
for i in range(5, 0, -1):
size = (gp1[i-1].shape[1], gp1[i-1].shape[0])
GE1 = cv2.pyrUp(gp1[i], dstsize=size)
GE2 = cv2.pyrUp(gp2[i], dstsize=size)
L1 = cv2.subtract(gp1[i-1], GE1)
L2 = cv2.subtract(gp2[i-1], GE2)
lp1.append(L1)
lp2.append(L2)
# 混合金字塔
LS = []
for l1, l2, gm in zip(lp1, lp2, reversed(gpM)):
gm = gm[:, :, np.newaxis] if len(gm.shape) == 2 else gm
ls = l1 * gm + l2 * (1.0 - gm)
LS.append(ls)
# 重建影像
blended = LS[0]
for i in range(1, 6):
size = (LS[i].shape[1], LS[i].shape[0])
blended = cv2.pyrUp(blended, dstsize=size)
blended = cv2.add(blended, LS[i])
return blended
# 創建兩個影像和遮罩
img1 = cv2.imread('image1.jpg')
img2 = cv2.imread('image2.jpg')
img1 = cv2.resize(img1, (500, 500))
img2 = cv2.resize(img2, (500, 500))
# 創建漸變遮罩
mask = np.zeros((500, 500), dtype=np.float32)
mask[:, :250] = 1.0 # 左邊為img1,右邊為img2
mask = cv2.GaussianBlur(mask, (51, 51), 0)
# 混合影像
blended = blend_images(img1, img2, mask)
# 顯示金字塔和混合結果
plt.figure(figsize=(15, 10))
plt.subplot(2, 4, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始影像')
plt.axis('off')
for i in range(3):
plt.subplot(2, 4, i+2)
plt.imshow(cv2.cvtColor(gaussian_pyramid[i], cv2.COLOR_BGR2RGB))
plt.title(f'高斯金字塔層 {i}')
plt.axis('off')
plt.subplot(2, 4, 5)
plt.imshow(cv2.cvtColor(img1, cv2.COLOR_BGR2RGB))
plt.title('影像1')
plt.axis('off')
plt.subplot(2, 4, 6)
plt.imshow(cv2.cvtColor(img2, cv2.COLOR_BGR2RGB))
plt.title('影像2')
plt.axis('off')
plt.subplot(2, 4, 7)
plt.imshow(mask, cmap='gray')
plt.title('混合遮罩')
plt.axis('off')
plt.subplot(2, 4, 8)
plt.imshow(cv2.cvtColor(blended.astype(np.uint8), cv2.COLOR_BGR2RGB))
plt.title('金字塔混合結果')
plt.axis('off')
plt.tight_layout()
plt.show()
第5章 影像特徵檢測與描述
5.1 邊緣檢測
python
# 邊緣檢測演算法比較
img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# 1. Sobel算子
sobel_x = cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=5)
sobel_y = cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=5)
sobel_combined = cv2.magnitude(sobel_x, sobel_y)
# 2. Scharr算子(對Sobel的優化)
scharr_x = cv2.Scharr(img_gray, cv2.CV_64F, 1, 0)
scharr_y = cv2.Scharr(img_gray, cv2.CV_64F, 0, 1)
scharr_combined = cv2.magnitude(scharr_x, scharr_y)
# 3. Laplacian算子
laplacian = cv2.Laplacian(img_gray, cv2.CV_64F)
# 4. Canny邊緣檢測(最常用)
canny_edges = cv2.Canny(img_gray, 100, 200) # 閾值可調整
# 自適應Canny邊緣檢測
def auto_canny(image, sigma=0.33):
# 計算影像的像素強度中位數
v = np.median(image)
# 根據中位數設置閾值
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
# 應用Canny邊緣檢測
edged = cv2.Canny(image, lower, upper)
return edged
auto_canny_edges = auto_canny(img_gray)
# 顯示各種邊緣檢測結果
edge_detectors = [
("原始灰度圖", img_gray),
("Sobel X", cv2.convertScaleAbs(sobel_x)),
("Sobel Y", cv2.convertScaleAbs(sobel_y)),
("Sobel Combined", cv2.convertScaleAbs(sobel_combined)),
("Scharr X", cv2.convertScaleAbs(scharr_x)),
("Scharr Y", cv2.convertScaleAbs(scharr_y)),
("Scharr Combined", cv2.convertScaleAbs(scharr_combined)),
("Laplacian", cv2.convertScaleAbs(laplacian)),
("Canny (100,200)", canny_edges),
("Auto Canny", auto_canny_edges)
]
plt.figure(figsize=(15, 12))
for i, (title, img_e) in enumerate(edge_detectors, 1):
plt.subplot(4, 3, i)
plt.imshow(img_e, cmap='gray')
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
5.2 角點檢測
python
# 角點檢測演算法
img = cv2.imread('chessboard.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 1. Harris角點檢測
gray_float = np.float32(gray)
harris_response = cv2.cornerHarris(gray_float, blockSize=2, ksize=3, k=0.04)
# 標記角點
img_harris = img.copy()
img_harris[harris_response > 0.01 * harris_response.max()] = [0, 0, 255]
# 2. Shi-Tomasi角點檢測(改進的Harris)
corners = cv2.goodFeaturesToTrack(gray, maxCorners=100,
qualityLevel=0.01,
minDistance=10)
corners = np.int0(corners)
img_shitomasi = img.copy()
for corner in corners:
x, y = corner.ravel()
cv2.circle(img_shitomasi, (x, y), 3, (0, 255, 0), -1)
# 3. FAST角點檢測
fast = cv2.FastFeatureDetector_create()
fast.setNonmaxSuppression(True)
fast_keypoints = fast.detect(gray, None)
img_fast = img.copy()
img_fast = cv2.drawKeypoints(img, fast_keypoints, None,
color=(255, 0, 0))
# 4. ORB特徵檢測(結合FAST和BRIEF)
orb = cv2.ORB_create(nfeatures=1000)
orb_keypoints, orb_descriptors = orb.detectAndCompute(gray, None)
img_orb = img.copy()
img_orb = cv2.drawKeypoints(img, orb_keypoints, None,
color=(0, 255, 255),
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# 5. SIFT特徵檢測
try:
sift = cv2.SIFT_create()
sift_keypoints, sift_descriptors = sift.detectAndCompute(gray, None)
img_sift = img.copy()
img_sift = cv2.drawKeypoints(img, sift_keypoints, None,
color=(255, 0, 255),
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
except:
img_sift = img.copy()
cv2.putText(img_sift, "SIFT not available", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# 顯示各種角點檢測結果
corner_detectors = [
("Harris角點檢測", img_harris),
("Shi-Tomasi角點檢測", img_shitomasi),
("FAST角點檢測", img_fast),
("ORB特徵檢測", img_orb),
("SIFT特徵檢測", img_sift)
]
plt.figure(figsize=(15, 8))
for i, (title, img_c) in enumerate(corner_detectors, 1):
plt.subplot(2, 3, i)
plt.imshow(cv2.cvtColor(img_c, cv2.COLOR_BGR2RGB))
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
5.3 特徵匹配
python
# 特徵匹配示例
img1 = cv2.imread('object.jpg', cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE)
# 初始化特徵檢測器
orb = cv2.ORB_create(nfeatures=1000)
# 檢測關鍵點和描述符
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
# 暴力匹配器
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# 匹配描述符
matches = bf.match(des1, des2)
# 按距離排序
matches = sorted(matches, key=lambda x: x.distance)
# 繪製最佳匹配
img_matches = cv2.drawMatches(img1, kp1, img2, kp2,
matches[:50], None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
# FLANN匹配器(適合SIFT、SURF等)
try:
# 創建SIFT檢測器
sift = cv2.SIFT_create()
# 檢測關鍵點和描述符
kp1_sift, des1_sift = sift.detectAndCompute(img1, None)
kp2_sift, des2_sift = sift.detectAndCompute(img2, None)
# FLANN參數
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
# 創建FLANN匹配器
flann = cv2.FlannBasedMatcher(index_params, search_params)
# 進行KNN匹配
matches_flann = flann.knnMatch(des1_sift, des2_sift, k=2)
# 應用Lowe's比率測試
good_matches = []
for m, n in matches_flann:
if m.distance < 0.7 * n.distance:
good_matches.append(m)
# 繪製匹配結果
img_flann_matches = cv2.drawMatches(img1, kp1_sift, img2, kp2_sift,
good_matches[:50], None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
except:
img_flann_matches = np.zeros_like(img_matches)
# 顯示匹配結果
plt.figure(figsize=(15, 8))
plt.subplot(1, 3, 1)
plt.imshow(img1, cmap='gray')
plt.title('查詢影像')
plt.axis('off')
plt.subplot(1, 3, 2)
plt.imshow(img_matches, cmap='gray')
plt.title('ORB暴力匹配 (前50個匹配)')
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(img_flann_matches, cmap='gray')
plt.title('SIFT FLANN匹配 (Lowe\'s比率測試)')
plt.axis('off')
plt.tight_layout()
plt.show()
第6章 影像分割與輪廓分析
6.1 影像分割
python
# 影像分割方法
img = cv2.imread('objects.jpg')
# 1. 基於閾值的分割
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh_binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 2. 基於邊緣的分割
edges = cv2.Canny(gray, 100, 200)
# 3. 基於區域的分割(分水嶺演算法)
def watershed_segmentation(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 應用閾值
_, thresh = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 噪聲去除
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# 確定背景區域
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 確定前景區域
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(),
255, 0)
# 找到未知區域
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# 標記標籤
_, markers = cv2.connectedComponents(sure_fg)
# 為分水嶺演算法增加1
markers = markers + 1
# 標記未知區域為0
markers[unknown == 255] = 0
# 應用分水嶺演算法
markers = cv2.watershed(image, markers)
image[markers == -1] = [255, 0, 0]
return image, markers
watershed_result, markers = watershed_segmentation(img.copy())
# 4. GrabCut分割(互動式分割)
def grabcut_segmentation(image, rect=None):
mask = np.zeros(image.shape[:2], np.uint8)
if rect is None:
rect = (50, 50, image.shape[1]-100, image.shape[0]-100)
# 背景和前景模型
bgd_model = np.zeros((1, 65), np.float64)
fgd_model = np.zeros((1, 65), np.float64)
# 應用GrabCut
cv2.grabCut(image, mask, rect, bgd_model, fgd_model,
5, cv2.GC_INIT_WITH_RECT)
# 創建遮罩
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
# 應用遮罩
result = image * mask2[:, :, np.newaxis]
return result
grabcut_result = grabcut_segmentation(img.copy())
# 5. K-means色彩分割
def kmeans_segmentation(image, k=3):
# 重塑影像為2D像素陣列
pixel_values = image.reshape((-1, 3))
pixel_values = np.float32(pixel_values)
# 定義停止條件
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
# 執行K-means聚類
_, labels, centers = cv2.kmeans(pixel_values, k, None,
criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# 轉換回8-bit值
centers = np.uint8(centers)
# 扁平化標籤陣列
labels_flat = labels.flatten()
# 將每個像素轉換為對應的中心顏色
segmented_image = centers[labels_flat]
# 重塑為原始影像形狀
segmented_image = segmented_image.reshape(image.shape)
return segmented_image
kmeans_result = kmeans_segmentation(img, k=4)
# 顯示分割結果
segmentation_methods = [
("原始影像", img),
("閾值分割", cv2.cvtColor(thresh_binary, cv2.COLOR_GRAY2BGR)),
("邊緣分割", cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)),
("分水嶺演算法", watershed_result),
("GrabCut分割", grabcut_result),
("K-means色彩分割", kmeans_result)
]
plt.figure(figsize=(15, 10))
for i, (title, img_s) in enumerate(segmentation_methods, 1):
plt.subplot(2, 3, i)
plt.imshow(cv2.cvtColor(img_s, cv2.COLOR_BGR2RGB))
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
6.2 輪廓檢測與分析
python
# 輪廓檢測與分析
img = cv2.imread('shapes.jpg')
img_copy = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 二值化影像
_, binary = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
# 尋找輪廓
contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
# 繪製所有輪廓
img_contours = img.copy()
cv2.drawContours(img_contours, contours, -1, (0, 255, 0), 3)
# 輪廓分析
img_analysis = img.copy()
for i, contour in enumerate(contours):
# 計算輪廓特徵
area = cv2.contourArea(contour)
perimeter = cv2.arcLength(contour, True)
# 輪廓近似(減少點數)
epsilon = 0.02 * perimeter
approx = cv2.approxPolyDP(contour, epsilon, True)
# 計算凸包
hull = cv2.convexHull(contour)
# 計算邊界矩形
x, y, w, h = cv2.boundingRect(contour)
# 計算最小包圍矩形
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
# 計算最小包圍圓
(x_circle, y_circle), radius = cv2.minEnclosingCircle(contour)
center_circle = (int(x_circle), int(y_circle))
radius = int(radius)
# 計算橢圓擬合
if len(contour) >= 5:
ellipse = cv2.fitEllipse(contour)
# 計算形狀特徵
# 1. 圓度
circularity = 4 * np.pi * area / (perimeter * perimeter) if perimeter > 0 else 0
# 2. 伸長度
aspect_ratio = float(w) / h if h > 0 else 0
# 3. 實心度
hull_area = cv2.contourArea(hull)
solidity = float(area) / hull_area if hull_area > 0 else 0
# 在影像上標註
cv2.putText(img_analysis, f'C{i}', (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
# 繪製邊界矩形
cv2.rectangle(img_analysis, (x, y), (x+w, y+h), (0, 255, 0), 2)
# 繪製最小包圍矩形
cv2.drawContours(img_analysis, [box], 0, (0, 0, 255), 2)
# 繪製最小包圍圓
cv2.circle(img_analysis, center_circle, radius, (255, 0, 0), 2)
# 繪製凸包
cv2.drawContours(img_analysis, [hull], 0, (255, 255, 0), 2)
# 列印輪廓資訊
print(f"輪廓 {i}:")
print(f" 面積: {area:.2f}")
print(f" 周長: {perimeter:.2f}")
print(f" 圓度: {circularity:.3f}")
print(f" 伸長度: {aspect_ratio:.3f}")
print(f" 實心度: {solidity:.3f}")
print(f" 近似頂點數: {len(approx)}")
print()
# 輪廓層次結構分析
img_hierarchy = img.copy()
# 繪製輪廓並顯示層次結構
for i, (contour, hier) in enumerate(zip(contours, hierarchy[0])):
# 根據層次深度選擇顏色
depth = 0
current_hier = hier
while current_hier[3] != -1:
depth += 1
current_hier = hierarchy[0][current_hier[3]]
# 為不同深度的輪廓分配不同顏色
color = (
int(255 * (depth % 3 == 0)),
int(255 * (depth % 3 == 1)),
int(255 * (depth % 3 == 2))
)
cv2.drawContours(img_hierarchy, [contour], -1, color, 2)
# 計算中心點
M = cv2.moments(contour)
if M['m00'] != 0:
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
cv2.putText(img_hierarchy, f'{i}({depth})', (cx-20, cy),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# 顯示輪廓分析結果
plt.figure(figsize=(15, 10))
plt.subplot(2, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始影像')
plt.axis('off')
plt.subplot(2, 3, 2)
plt.imshow(binary, cmap='gray')
plt.title('二值化影像')
plt.axis('off')
plt.subplot(2, 3, 3)
plt.imshow(cv2.cvtColor(img_contours, cv2.COLOR_BGR2RGB))
plt.title(f'檢測到{len(contours)}個輪廓')
plt.axis('off')
plt.subplot(2, 3, 4)
plt.imshow(cv2.cvtColor(img_analysis, cv2.COLOR_BGR2RGB))
plt.title('輪廓特徵分析')
plt.axis('off')
plt.subplot(2, 3, 5)
plt.imshow(cv2.cvtColor(img_hierarchy, cv2.COLOR_BGR2RGB))
plt.title('輪廓層次結構')
plt.axis('off')
plt.tight_layout()
plt.show()
第三部分:OpenCV高級應用與實戰
第7章 物件檢測與追蹤
7.1 傳統物件檢測方法
python
# 傳統物件檢測方法
# 1. 模板匹配
def template_matching_demo():
img = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE)
template = cv2.imread('template.jpg', cv2.IMREAD_GRAYSCALE)
w, h = template.shape[::-1]
# 使用不同的匹配方法
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED',
'cv2.TM_CCORR', 'cv2.TM_CCORR_NORMED',
'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
results = []
for meth in methods:
method = eval(meth)
# 應用模板匹配
res = cv2.matchTemplate(img, template, method)
# 找到最佳匹配位置
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
else:
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
# 繪製矩形
img_display = cv2.cvtColor(img.copy(), cv2.COLOR_GRAY2BGR)
cv2.rectangle(img_display, top_left, bottom_right, (0, 255, 0), 2)
results.append((meth, img_display))
return results
# 2. 特徵匹配物件檢測
def feature_matching_object_detection():
img1 = cv2.imread('object.jpg', cv2.IMREAD_GRAYSCALE) # 物件影像
img2 = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE) # 場景影像
# 初始化ORB檢測器
orb = cv2.ORB_create(1000)
# 檢測關鍵點和描述符
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
# 創建BFMatcher物件
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# 匹配描述符
matches = bf.match(des1, des2)
# 按距離排序
matches = sorted(matches, key=lambda x: x.distance)
# 繪製前50個匹配
img_matches = cv2.drawMatches(img1, kp1, img2, kp2,
matches[:50], None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
# 找到物件位置(使用單應性矩陣)
if len(matches) >= 4:
src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
# 計算單應性矩陣
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
if M is not None:
h, w = img1.shape
pts = np.float32([[0, 0], [0, h-1], [w-1, h-1], [w-1, 0]]).reshape(-1, 1, 2)
# 應用透視變換
dst = cv2.perspectiveTransform(pts, M)
# 在場景影像上繪製邊界
img2_color = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
img2_color = cv2.polylines(img2_color, [np.int32(dst)],
True, (0, 255, 0), 3, cv2.LINE_AA)
else:
img2_color = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
else:
img2_color = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
return img_matches, img2_color
# 執行物件檢測
template_results = template_matching_demo()
feature_matches, object_detected = feature_matching_object_detection()
# 顯示結果
plt.figure(figsize=(20, 12))
# 顯示模板匹配結果
for i, (title, img_t) in enumerate(template_results[:3], 1):
plt.subplot(3, 4, i)
plt.imshow(cv2.cvtColor(img_t, cv2.COLOR_BGR2RGB))
plt.title(title[4:])
plt.axis('off')
for i, (title, img_t) in enumerate(template_results[3:], 4):
plt.subplot(3, 4, i)
plt.imshow(cv2.cvtColor(img_t, cv2.COLOR_BGR2RGB))
plt.title(title[4:])
plt.axis('off')
# 顯示特徵匹配結果
plt.subplot(3, 4, 9)
plt.imshow(feature_matches, cmap='gray')
plt.title('特徵匹配')
plt.axis('off')
plt.subplot(3, 4, 10)
plt.imshow(cv2.cvtColor(object_detected, cv2.COLOR_BGR2RGB))
plt.title('物件檢測結果')
plt.axis('off')
plt.tight_layout()
plt.show()
7.2 基於Haar Cascade的人臉檢測
python
# Haar Cascade人臉檢測
def haar_cascade_face_detection():
# 載入預訓練的級聯分類器
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
smile_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_smile.xml'
)
# 讀取影像
img = cv2.imread('group_photo.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 檢測人臉
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE
)
print(f"檢測到 {len(faces)} 張人臉")
# 繪製人臉矩形
img_faces = img.copy()
for (x, y, w, h) in faces:
cv2.rectangle(img_faces, (x, y), (x+w, y+h), (255, 0, 0), 2)
# 在每個人臉區域內檢測眼睛
roi_gray = gray[y:y+h, x:x+w]
roi_color = img_faces[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
# 在每個人臉區域內檢測微笑
smiles = smile_cascade.detectMultiScale(roi_gray, 1.8, 20)
for (sx, sy, sw, sh) in smiles:
cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 0, 255), 2)
# 使用不同的參數進行檢測比較
scale_factors = [1.1, 1.2, 1.3]
min_neighbors_list = [3, 5, 7]
comparison_results = []
for scale in scale_factors:
for neighbors in min_neighbors_list:
faces_test = face_cascade.detectMultiScale(
gray,
scaleFactor=scale,
minNeighbors=neighbors,
minSize=(30, 30)
)
img_test = img.copy()
for (x, y, w, h) in faces_test:
cv2.rectangle(img_test, (x, y), (x+w, y+h), (0, 255, 0), 2)
comparison_results.append((
f"scale={scale}, neighbors={neighbors}\n檢測到 {len(faces_test)} 張人臉",
img_test
))
return img_faces, comparison_results
# 執行人臉檢測
img_faces, comparisons = haar_cascade_face_detection()
# 顯示結果
plt.figure(figsize=(15, 12))
plt.subplot(3, 3, 1)
plt.imshow(cv2.cvtColor(img_faces, cv2.COLOR_BGR2RGB))
plt.title('完整人臉檢測(含眼睛和微笑)')
plt.axis('off')
for i, (title, img_c) in enumerate(comparisons, 2):
plt.subplot(3, 3, i)
plt.imshow(cv2.cvtColor(img_c, cv2.COLOR_BGR2RGB))
plt.title(title, fontsize=9)
plt.axis('off')
plt.tight_layout()
plt.show()
7.3 物件追蹤演算法
python
# 物件追蹤演算法比較
def object_tracking_comparison(video_path='test_video.mp4'):
# 初始化追蹤器列表
tracker_types = ['BOOSTING', 'MIL', 'KCF', 'TLD',
'MEDIANFLOW', 'MOSSE', 'CSRT']
# 創建追蹤器字典
tracker_dict = {
'BOOSTING': cv2.legacy.TrackerBoosting_create,
'MIL': cv2.legacy.TrackerMIL_create,
'KCF': cv2.legacy.TrackerKCF_create,
'TLD': cv2.legacy.TrackerTLD_create,
'MEDIANFLOW': cv2.legacy.TrackerMedianFlow_create,
'MOSSE': cv2.legacy.TrackerMOSSE_create,
'CSRT': cv2.legacy.TrackerCSRT_create
}
# 讀取影片
cap = cv2.VideoCapture(video_path)
# 讀取第一幀
ret, frame = cap.read()
if not ret:
print("無法讀取影片")
return
# 選擇ROI(感興趣區域)
bbox = cv2.selectROI("選擇追蹤物件", frame, False)
cv2.destroyWindow("選擇追蹤物件")
# 初始化追蹤器
trackers = {}
tracker_results = {}
for tracker_type in tracker_types:
tracker = tracker_dict[tracker_type]()
trackers[tracker_type] = tracker
tracker.init(frame, bbox)
tracker_results[tracker_type] = []
# 追蹤過程
frames = [frame.copy()]
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255),
(255, 255, 0), (255, 0, 255), (0, 255, 255), (128, 128, 128)]
frame_count = 0
max_frames = 100 # 限制處理的幀數
while frame_count < max_frames:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
# 更新每個追蹤器
for (tracker_type, tracker), color in zip(trackers.items(), colors):
success, bbox = tracker.update(frame)
if success:
# 追蹤成功:繪製邊界框
p1 = (int(bbox[0]), int(bbox[1]))
p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
cv2.rectangle(frame, p1, p2, color, 2, 1)
# 在框上顯示追蹤器類型
cv2.putText(frame, tracker_type, (p1[0], p1[1]-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# 儲存結果
tracker_results[tracker_type].append({
'frame': frame_count,
'bbox': bbox,
'success': True
})
else:
# 追蹤失敗
cv2.putText(frame, "追蹤失敗", (100, 80),
cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
tracker_results[tracker_type].append({
'frame': frame_count,
'success': False
})
# 每隔10幀儲存一幀用於顯示
if frame_count % 10 == 0:
frames.append(frame.copy())
# 顯示進度
if frame_count % 10 == 0:
print(f"處理第 {frame_count} 幀...")
cap.release()
# 分析追蹤性能
performance_stats = {}
for tracker_type in tracker_types:
results = tracker_results[tracker_type]
success_count = sum(1 for r in results if r['success'])
success_rate = success_count / len(results) * 100 if results else 0
performance_stats[tracker_type] = {
'success_rate': success_rate,
'total_frames': len(results),
'success_frames': success_count
}
# 顯示追蹤結果
plt.figure(figsize=(15, 10))
# 顯示選取的幀
for i, frame in enumerate(frames[:min(6, len(frames))]):
plt.subplot(2, 3, i+1)
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
plt.title(f'第 {i*10} 幀')
plt.axis('off')
plt.tight_layout()
plt.show()
# 顯示性能統計
print("\n追蹤器性能比較:")
print("=" * 60)
for tracker_type, stats in performance_stats.items():
print(f"{tracker_type:12s} | 成功率: {stats['success_rate']:6.2f}% | "
f"成功幀數: {stats['success_frames']:3d}/{stats['total_frames']:3d}")
return performance_stats
# 執行追蹤比較(如果沒有影片文件,可以跳過這部分)
try:
# 注意:需要準備測試影片或使用相機
# performance_stats = object_tracking_comparison()
pass
except Exception as e:
print(f"追蹤演示跳過:{e}")
第8章 深度學習與OpenCV
8.1 使用OpenCV加載深度學習模型
python
# 使用OpenCV加載深度學習模型進行物件檢測
def deep_learning_object_detection():
# 載入COCO類別標籤
class_names = []
with open('coco.names', 'r') as f:
class_names = [line.strip() for line in f.readlines()]
# 載入模型配置和權重
model_config = 'yolov3.cfg'
model_weights = 'yolov3.weights'
# 載入網路
net = cv2.dnn.readNetFromDarknet(model_config, model_weights)
# 使用GPU(如果可用)
try:
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
print("使用CUDA加速")
except:
print("使用CPU")
# 讀取影像
img = cv2.imread('street_scene.jpg')
height, width = img.shape[:2]
# 準備輸入blob
blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416),
swapRB=True, crop=False)
# 設定網路輸入
net.setInput(blob)
# 獲取輸出層名稱
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# 前向傳播
outputs = net.forward(output_layers)
# 處理檢測結果
boxes = []
confidences = []
class_ids = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5: # 置信度閾值
# 計算邊界框座標
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# 矩形左上角座標
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# 應用非最大抑制
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# 繪製檢測結果
colors = np.random.uniform(0, 255, size=(len(class_names), 3))
if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
label = str(class_names[class_ids[i]])
confidence = confidences[i]
color = colors[class_ids[i]]
# 繪製邊界框
cv2.rectangle(img, (x, y), (x+w, y+h), color, 2)
# 繪製標籤和置信度
text = f"{label}: {confidence:.2f}"
cv2.putText(img, text, (x, y-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
return img, len(indices)
# 人臉識別與情感分析
def face_recognition_emotion():
# 載入人臉檢測模型
face_prototxt = 'deploy.prototxt'
face_model = 'res10_300x300_ssd_iter_140000.caffemodel'
face_net = cv2.dnn.readNetFromCaffe(face_prototxt, face_model)
# 載入情感識別模型
emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy',
'Neutral', 'Sad', 'Surprise']
emotion_net = cv2.dnn.readNetFromTensorflow(
'emotion_model.pb', 'emotion_model.pbtxt'
)
# 讀取影像
img = cv2.imread('people.jpg')
(h, w) = img.shape[:2]
# 準備人臉檢測的blob
blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0,
(300, 300), (104.0, 177.0, 123.0))
# 人臉檢測
face_net.setInput(blob)
detections = face_net.forward()
# 處理檢測結果
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5: # 置信度閾值
# 計算邊界框
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# 確保邊界框在影像範圍內
startX = max(0, startX)
startY = max(0, startY)
endX = min(w, endX)
endY = min(h, endY)
# 提取人臉ROI
face_roi = img[startY:endY, startX:endX]
if face_roi.size == 0:
continue
# 情感分析
face_blob = cv2.dnn.blobFromImage(face_roi, 1.0, (64, 64),
(0, 0, 0), swapRB=True, crop=False)
emotion_net.setInput(face_blob)
preds = emotion_net.forward()
# 獲取情感標籤
emotion_label = emotion_labels[np.argmax(preds)]
emotion_confidence = np.max(preds)
# 繪製結果
# 繪製人臉邊界框
cv2.rectangle(img, (startX, startY), (endX, endY),
(0, 255, 0), 2)
# 顯示情感結果
text = f"{emotion_label}: {emotion_confidence:.2f}"
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.putText(img, text, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
return img
# 執行深度學習範例
try:
# 物件檢測
detected_img, num_objects = deep_learning_object_detection()
# 人臉情感識別
emotion_img = face_recognition_emotion()
# 顯示結果
plt.figure(figsize=(15, 7))
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(detected_img, cv2.COLOR_BGR2RGB))
plt.title(f'YOLO物件檢測 (檢測到 {num_objects} 個物件)')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(emotion_img, cv2.COLOR_BGR2RGB))
plt.title('人臉檢測與情感分析')
plt.axis('off')
plt.tight_layout()
plt.show()
except Exception as e:
print(f"深度學習範例跳過:{e}")
print("注意:需要下載預訓練模型文件才能運行此示例")
8.2 實時影像處理與分析
python
# 實時影像處理類別
class RealTimeCVProcessor:
def __init__(self, camera_id=0):
self.cap = cv2.VideoCapture(camera_id)
self.running = False
self.mode = 'original' # 處理模式
self.filters = {
'original': self.apply_original,
'gray': self.apply_gray,
'edges': self.apply_edges,
'blur': self.apply_blur,
'face_detect': self.apply_face_detect,
'motion_detect': self.apply_motion_detect,
'color_track': self.apply_color_track
}
def apply_original(self, frame):
return frame
def apply_gray(self, frame):
return cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
def apply_edges(self, frame):
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)
return cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
def apply_blur(self, frame):
return cv2.GaussianBlur(frame, (15, 15), 0)
def apply_face_detect(self, frame):
# 載入人臉檢測器
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
return frame
def apply_motion_detect(self, frame):
if not hasattr(self, 'prev_gray'):
self.prev_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
return frame
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame_diff = cv2.absdiff(self.prev_gray, gray)
_, motion_mask = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY)
# 尋找運動區域的輪廓
contours, _ = cv2.findContours(motion_mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
if cv2.contourArea(contour) > 500: # 忽略小區域
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
self.prev_gray = gray
return frame
def apply_color_track(self, frame):
# 轉換到HSV色彩空間
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# 定義紅色範圍
lower_red1 = np.array([0, 100, 100])
upper_red1 = np.array([10, 255, 255])
lower_red2 = np.array([160, 100, 100])
upper_red2 = np.array([180, 255, 255])
# 創建紅色遮罩
mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
mask = cv2.bitwise_or(mask1, mask2)
# 形態學操作
kernel = np.ones((5, 5), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
# 尋找輪廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
if cv2.contourArea(contour) > 500:
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.putText(frame, 'Red Object', (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
return frame
def set_mode(self, mode):
if mode in self.filters:
self.mode = mode
def run(self):
self.running = True
print("實時影像處理啟動")
print("按鍵說明:")
print(" '1' - 原始影像")
print(" '2' - 灰度影像")
print(" '3' - 邊緣檢測")
print(" '4' - 模糊效果")
print(" '5' - 人臉檢測")
print(" '6' - 運動檢測")
print(" '7' - 顏色追蹤")
print(" 'q' - 退出")
while self.running:
ret, frame = self.cap.read()
if not ret:
break
# 應用當前模式的處理
processed = self.filters[self.mode](frame.copy())
# 顯示處理模式
cv2.putText(processed, f"Mode: {self.mode}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 顯示影像
cv2.imshow('Real-Time CV Processing', processed)
# 處理按鍵事件
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('1'):
self.set_mode('original')
elif key == ord('2'):
self.set_mode('gray')
elif key == ord('3'):
self.set_mode('edges')
elif key == ord('4'):
self.set_mode('blur')
elif key == ord('5'):
self.set_mode('face_detect')
elif key == ord('6'):
self.set_mode('motion_detect')
elif key == ord('7'):
self.set_mode('color_track')
self.cleanup()
def cleanup(self):
self.running = False
self.cap.release()
cv2.destroyAllWindows()
print("實時影像處理結束")
# 執行實時影像處理
# 注意:這需要連接攝影機
try:
processor = RealTimeCVProcessor()
processor.run()
except Exception as e:
print(f"實時處理演示跳過:{e}")
第9章 專案實戰:完整的電腦視覺應用
9.1 智慧停車場管理系統
python
# 智慧停車場管理系統
class SmartParkingSystem:
def __init__(self, video_source=0):
self.cap = cv2.VideoCapture(video_source)
self.parking_spots = [] # 停車位列表
self.spot_status = {} # 停車位狀態
self.spot_counter = 0 # 停車位計數器
def define_parking_spots(self, frame):
"""手動定義停車位"""
print("點擊並拖曳定義停車位,按's'保存,'r'重置,'q'退出")
spots = []
current_spot = []
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
current_spot.append((x, y))
elif event == cv2.EVENT_LBUTTONUP:
if len(current_spot) == 2:
spots.append(tuple(current_spot))
current_spot.clear()
clone = frame.copy()
cv2.namedWindow("Define Parking Spots")
cv2.setMouseCallback("Define Parking Spots", mouse_callback)
while True:
display = clone.copy()
# 繪製已定義的停車位
for i, (pt1, pt2) in enumerate(spots):
cv2.rectangle(display, pt1, pt2, (0, 255, 0), 2)
cv2.putText(display, f"Spot {i+1}",
(pt1[0], pt1[1]-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# 繪製當前正在定義的停車位
if len(current_spot) == 1:
cv2.rectangle(display, current_spot[0],
(current_spot[0][0], current_spot[0][1]),
(255, 0, 0), 2)
cv2.imshow("Define Parking Spots", display)
key = cv2.waitKey(1) & 0xFF
if key == ord('s'):
self.parking_spots = spots
print(f"保存了 {len(spots)} 個停車位")
break
elif key == ord('r'):
spots = []
current_spot = []
clone = frame.copy()
print("重置所有停車位")
elif key == ord('q'):
break
cv2.destroyWindow("Define Parking Spots")
return spots
def check_parking_spot(self, frame, spot):
"""檢查停車位是否被佔用"""
(x1, y1), (x2, y2) = spot
# 提取停車位區域
roi = frame[y1:y2, x1:x2]
if roi.size == 0:
return False
# 轉換為灰度圖
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# 應用高斯模糊
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# 邊緣檢測
edges = cv2.Canny(blurred, 50, 150)
# 計算邊緣像素比例
edge_ratio = np.sum(edges > 0) / edges.size
# 基於邊緣密度判斷是否被佔用
# 空車位邊緣較少,有車的車位邊緣較多
return edge_ratio > 0.05 # 閾值可調整
def process_video(self):
"""處理影片串流"""
print("智慧停車場系統啟動中...")
# 讀取第一幀來定義停車位
ret, frame = self.cap.read()
if not ret:
print("無法讀取影片")
return
# 定義停車位
self.parking_spots = self.define_parking_spots(frame)
if not self.parking_spots:
print("未定義停車位,系統退出")
return
# 初始化停車位狀態
self.spot_status = {i: False for i in range(len(self.parking_spots))}
print(f"開始監控 {len(self.parking_spots)} 個停車位...")
print("按'q'退出監控")
# 主循環
while True:
ret, frame = self.cap.read()
if not ret:
break
# 複製框架用於顯示
display = frame.copy()
available_spots = 0
# 檢查每個停車位
for i, spot in enumerate(self.parking_spots):
is_occupied = self.check_parking_spot(frame, spot)
self.spot_status[i] = is_occupied
# 繪製停車位
(x1, y1), (x2, y2) = spot
color = (0, 0, 255) if is_occupied else (0, 255, 0)
thickness = 2
cv2.rectangle(display, (x1, y1), (x2, y2), color, thickness)
# 顯示狀態
status = "Occupied" if is_occupied else "Available"
cv2.putText(display, f"Spot {i+1}: {status}",
(x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX,
0.5, color, 2)
if not is_occupied:
available_spots += 1
# 顯示統計資訊
cv2.putText(display, f"Available: {available_spots}/{len(self.parking_spots)}",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# 顯示時間
from datetime import datetime
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cv2.putText(display, current_time,
(10, display.shape[0] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
# 顯示影像
cv2.imshow("Smart Parking System", display)
# 退出條件
if cv2.waitKey(1) & 0xFF == ord('q'):
break
self.cleanup()
def cleanup(self):
self.cap.release()
cv2.destroyAllWindows()
print("智慧停車場系統已關閉")
# 執行智慧停車場系統
# 注意:需要攝影機或影片文件
try:
parking_system = SmartParkingSystem('parking_lot_video.mp4')
parking_system.process_video()
except Exception as e:
print(f"停車場系統演示跳過:{e}")
9.2 增強現實(AR)應用
python
# 增強現實(AR)應用
class AugmentedRealityApp:
def __init__(self, marker_image='ar_marker.jpg'):
self.marker_image = cv2.imread(marker_image, cv2.IMREAD_GRAYSCALE)
self.ar_objects = [] # AR物件列表
self.cap = cv2.VideoCapture(0)
# 初始化ORB檢測器
self.orb = cv2.ORB_create(1000)
# 檢測標記圖像的特徵
self.marker_kp, self.marker_des = self.orb.detectAndCompute(
self.marker_image, None
)
# 創建暴力匹配器
self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# 載入3D模型(簡化為2D圖像)
self.load_ar_objects()
def load_ar_objects(self):
"""載入AR物件"""
# 這裡可以載入各種3D模型或2D圖像
# 為簡化,我們使用顏色塊
self.ar_objects = [
{
'name': 'cube',
'points': np.float32([
[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0],
[0, 0, -1], [1, 0, -1], [1, 1, -1], [0, 1, -1]
]),
'edges': [(0,1), (1,2), (2,3), (3,0),
(4,5), (5,6), (6,7), (7,4),
(0,4), (1,5), (2,6), (3,7)],
'color': (0, 255, 0)
},
{
'name': 'pyramid',
'points': np.float32([
[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0],
[0.5, 0.5, 1]
]),
'edges': [(0,1), (1,2), (2,3), (3,0),
(0,4), (1,4), (2,4), (3,4)],
'color': (255, 0, 0)
}
]
def detect_marker(self, frame):
"""檢測AR標記"""
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 檢測特徵
kp, des = self.orb.detectAndCompute(gray, None)
if des is None or len(kp) < 4:
return None, None, None
# 匹配特徵
matches = self.bf.match(self.marker_des, des)
matches = sorted(matches, key=lambda x: x.distance)
if len(matches) < 10: # 最少匹配數
return None, None, None
# 提取匹配點
src_pts = np.float32([self.marker_kp[m.queryIdx].pt
for m in matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp[m.trainIdx].pt
for m in matches]).reshape(-1, 1, 2)
# 計算單應性矩陣
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
if M is None:
return None, None, None
return M, kp, matches
def draw_ar_object(self, frame, homography, ar_object):
"""繪製AR物件"""
# 獲取標記的角點
h, w = self.marker_image.shape
marker_corners = np.float32([
[0, 0], [w, 0], [w, h], [0, h]
]).reshape(-1, 1, 2)
# 計算標記在影像中的位置
dst_corners = cv2.perspectiveTransform(marker_corners, homography)
# 計算相機矩陣(簡化)
# 在實際應用中,需要相機標定
focal_length = frame.shape[1]
center = (frame.shape[1]/2, frame.shape[0]/2)
camera_matrix = np.array([
[focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]
], dtype=np.float32)
# 假設沒有透鏡畸變
dist_coeffs = np.zeros((4, 1))
# 求解PnP問題(3D到2D的投影)
# 標記的3D點(假設標記在Z=0平面上)
marker_3d_points = np.float32([
[0, 0, 0], [w, 0, 0], [w, h, 0], [0, h, 0]
])
# 使用solvePnP計算旋轉和平移向量
success, rvec, tvec = cv2.solvePnP(
marker_3d_points, dst_corners,
camera_matrix, dist_coeffs
)
if not success:
return frame
# 投影AR物件的3D點
ar_points_3d = ar_object['points']
# 將3D點投影到2D影像平面
ar_points_2d, _ = cv2.projectPoints(
ar_points_3d, rvec, tvec,
camera_matrix, dist_coeffs
)
ar_points_2d = np.int32(ar_points_2d).reshape(-1, 2)
# 繪製邊緣
for edge in ar_object['edges']:
pt1 = tuple(ar_points_2d[edge[0]])
pt2 = tuple(ar_points_2d[edge[1]])
cv2.line(frame, pt1, pt2, ar_object['color'], 2)
# 繪製頂點
for point in ar_points_2d:
cv2.circle(frame, tuple(point), 3, (0, 0, 255), -1)
return frame
def run(self):
"""運行AR應用"""
print("增強現實應用啟動")
print("按'q'退出")
print("按'1', '2'切換AR物件")
current_object_idx = 0
while True:
ret, frame = self.cap.read()
if not ret:
break
# 檢測AR標記
homography, kp, matches = self.detect_marker(frame)
if homography is not None:
# 繪製匹配特徵
frame = cv2.drawMatches(
self.marker_image, self.marker_kp,
frame, kp, matches[:20], None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
)
# 繪製AR物件
frame = self.draw_ar_object(
frame, homography,
self.ar_objects[current_object_idx]
)
# 顯示當前物件名稱
cv2.putText(frame,
f"AR Object: {self.ar_objects[current_object_idx]['name']}",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (255, 255, 255), 2)
else:
cv2.putText(frame, "Marker not detected",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 0, 255), 2)
# 顯示影像
cv2.imshow('Augmented Reality', frame)
# 處理按鍵
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('1'):
current_object_idx = 0
elif key == ord('2'):
current_object_idx = 1
self.cleanup()
def cleanup(self):
self.cap.release()
cv2.destroyAllWindows()
print("增強現實應用已關閉")
# 執行AR應用
# 注意:需要攝影機和AR標記圖像
try:
ar_app = AugmentedRealityApp()
ar_app.run()
except Exception as e:
print(f"AR應用演示跳過:{e}")
第四部分:性能優化與最佳實踐
第10章 OpenCV性能優化技巧
10.1 性能優化策略
python
import time
import numpy as np
class OpenCVOptimizer:
def __init__(self):
self.results = {}
def benchmark(self, func, *args, **kwargs):
"""基準測試函數"""
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
return result, end_time - start_time
def optimize_loop_operations(self, image):
"""優化循環操作"""
height, width = image.shape[:2]
# 方法1:使用Python循環(最慢)
def slow_method(img):
result = img.copy()
for y in range(height):
for x in range(width):
for c in range(3):
result[y, x, c] = min(255, img[y, x, c] * 1.5)
return result
# 方法2:使用NumPy向量化操作(最快)
def fast_method(img):
result = np.clip(img.astype(np.float32) * 1.5, 0, 255)
return result.astype(np.uint8)
# 方法3:使用OpenCV內置函數
def opencv_method(img):
return cv2.convertScaleAbs(img, alpha=1.5, beta=0)
# 執行基準測試
print("循環操作優化比較:")
print("-" * 50)
_, time_slow = self.benchmark(slow_method, image)
_, time_fast = self.benchmark(fast_method, image)
_, time_opencv = self.benchmark(opencv_method, image)
print(f"Python循環: {time_slow:.4f} 秒")
print(f"NumPy向量化: {time_fast:.4f} 秒 (加速 {time_slow/time_fast:.1f}x)")
print(f"OpenCV內置: {time_opencv:.4f} 秒 (加速 {time_slow/time_opencv:.1f}x)")
return {
'slow': time_slow,
'fast': time_fast,
'opencv': time_opencv
}
def optimize_image_operations(self, image):
"""優化影像操作"""
# 方法比較:影像縮放
def method_nearest(img):
return cv2.resize(img, (800, 600), interpolation=cv2.INTER_NEAREST)
def method_linear(img):
return cv2.resize(img, (800, 600), interpolation=cv2.INTER_LINEAR)
def method_cubic(img):
return cv2.resize(img, (800, 600), interpolation=cv2.INTER_CUBIC)
def method_area(img):
return cv2.resize(img, (800, 600), interpolation=cv2.INTER_AREA)
def method_lanczos(img):
return cv2.resize(img, (800, 600), interpolation=cv2.INTER_LANCZOS4)
methods = [
('NEAREST', method_nearest),
('LINEAR', method_linear),
('CUBIC', method_cubic),
('AREA', method_area),
('LANCZOS', method_lanczos)
]
print("\n影像縮放插值方法比較:")
print("-" * 50)
times = {}
for name, method in methods:
_, exec_time = self.benchmark(method, image)
times[name] = exec_time
print(f"{name:10s}: {exec_time:.6f} 秒")
return times
def optimize_memory_usage(self, image):
"""優化記憶體使用"""
print("\n記憶體使用優化:")
print("-" * 50)
# 檢查記憶體使用
import sys
# 方法1:原始影像
mem_original = sys.getsizeof(image)
# 方法2:使用適當的資料類型
image_float32 = image.astype(np.float32)
mem_float32 = sys.getsizeof(image_float32)
# 方法3:使用灰度圖(減少通道)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
mem_gray = sys.getsizeof(gray)
# 方法4:使用壓縮
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 90]
_, buffer = cv2.imencode('.jpg', image, encode_param)
mem_compressed = sys.getsizeof(buffer)
print(f"原始BGR影像: {mem_original / 1024:.1f} KB")
print(f"float32影像: {mem_float32 / 1024:.1f} KB")
print(f"灰度影像: {mem_gray / 1024:.1f} KB (減少 {((mem_original-mem_gray)/mem_original*100):.1f}%)")
print(f"JPEG壓縮: {mem_compressed / 1024:.1f} KB (減少 {((mem_original-mem_compressed)/mem_original*100):.1f}%)")
return {
'original': mem_original,
'float32': mem_float32,
'gray': mem_gray,
'compressed': mem_compressed
}
def parallel_processing_optimization(self, images):
"""平行處理優化"""
print("\n平行處理優化:")
print("-" * 50)
def process_single(image):
# 模擬影像處理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(blurred, 50, 150)
return edges
# 順序處理
def sequential_processing(imgs):
results = []
for img in imgs:
results.append(process_single(img))
return results
# 平行處理(使用multiprocessing)
import multiprocessing as mp
def parallel_processing(imgs):
with mp.Pool(processes=mp.cpu_count()) as pool:
results = pool.map(process_single, imgs)
return results
# 基準測試
_, time_seq = self.benchmark(sequential_processing, images)
_, time_par = self.benchmark(parallel_processing, images)
print(f"順序處理: {time_seq:.4f} 秒")
print(f"平行處理: {time_par:.4f} 秒")
print(f"加速比: {time_seq/time_par:.2f}x")
print(f"CPU核心數: {mp.cpu_count()}")
return {
'sequential': time_seq,
'parallel': time_par,
'speedup': time_seq/time_par,
'cpu_cores': mp.cpu_count()
}
def gpu_acceleration(self, image):
"""GPU加速"""
print("\nGPU加速檢查:")
print("-" * 50)
gpu_info = {}
try:
# 檢查CUDA是否可用
count = cv2.cuda.getCudaEnabledDeviceCount()
gpu_info['cuda_devices'] = count
if count > 0:
print(f"檢測到 {count} 個CUDA設備")
# 獲取設備資訊
for i in range(count):
info = cv2.cuda.printCudaDeviceInfo(i)
print(f"設備 {i}: {info}")
# 測試GPU加速
# 創建GPU影像
gpu_img = cv2.cuda_GpuMat()
gpu_img.upload(image)
# GPU處理
start = time.time()
# 轉換為灰度
gpu_gray = cv2.cuda.cvtColor(gpu_img, cv2.COLOR_BGR2GRAY)
# 高斯模糊
gpu_blur = cv2.cuda.createGaussianFilter(
cv2.CV_8UC1, cv2.CV_8UC1, (5, 5), 0
).apply(gpu_gray)
# Sobel邊緣檢測
sobel_x = cv2.cuda.createSobelFilter(
cv2.CV_8UC1, cv2.CV_8UC1, 1, 0
).apply(gpu_blur)
sobel_y = cv2.cuda.createSobelFilter(
cv2.CV_8UC1, cv2.CV_8UC1, 0, 1
).apply(gpu_blur)
# 下載結果
result_x = sobel_x.download()
result_y = sobel_y.download()
gpu_time = time.time() - start
# CPU處理對比
start = time.time()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
sobelx = cv2.Sobel(blur, cv2.CV_8UC1, 1, 0, ksize=3)
sobely = cv2.Sobel(blur, cv2.CV_8UC1, 0, 1, ksize=3)
cpu_time = time.time() - start
print(f"CPU處理時間: {cpu_time:.6f} 秒")
print(f"GPU處理時間: {gpu_time:.6f} 秒")
print(f"GPU加速比: {cpu_time/gpu_time:.2f}x")
gpu_info['cpu_time'] = cpu_time
gpu_info['gpu_time'] = gpu_time
gpu_info['speedup'] = cpu_time/gpu_time
else:
print("未檢測到CUDA設備")
except Exception as e:
print(f"GPU加速檢查失敗: {e}")
return gpu_info
# 執行性能優化測試
def run_optimization_tests():
# 創建測試影像
test_image = np.random.randint(0, 256, (1080, 1920, 3), dtype=np.uint8)
optimizer = OpenCVOptimizer()
# 運行各種優化測試
print("=" * 60)
print("OpenCV性能優化測試")
print("=" * 60)
# 1. 循環操作優化
loop_results = optimizer.optimize_loop_operations(test_image[:100, :100])
# 2. 影像操作優化
resize_results = optimizer.optimize_image_operations(test_image)
# 3. 記憶體使用優化
memory_results = optimizer.optimize_memory_usage(test_image)
# 4. 創建多個測試影像用於平行處理
test_images = [test_image.copy() for _ in range(10)]
parallel_results = optimizer.parallel_processing_optimization(test_images)
# 5. GPU加速檢查
gpu_results = optimizer.gpu_acceleration(test_image)
print("\n" + "=" * 60)
print("性能優化建議總結:")
print("=" * 60)
print("1. 避免使用Python循環處理像素,使用NumPy向量化操作")
print("2. 選擇適當的插值方法:")
print(" - INTER_NEAREST: 最快,質量最低")
print(" - INTER_LINEAR: 平衡速度和質量")
print(" - INTER_CUBIC: 高質量,較慢")
print(" - INTER_AREA: 縮小影像時最佳")
print("3. 減少記憶體使用:")
print(" - 使用灰度圖替代彩色圖")
print(" - 使用適當的資料類型")
print(" - 考慮影像壓縮")
print("4. 利用平行處理處理多個影像")
print("5. 如有GPU,啟用CUDA加速")
return {
'loop': loop_results,
'resize': resize_results,
'memory': memory_results,
'parallel': parallel_results,
'gpu': gpu_results
}
# 運行優化測試
optimization_results = run_optimization_tests()
第11章 最佳實踐與常見問題
11.1 最佳實踐指南
python
# OpenCV最佳實踐類別
class OpenCVBestPractices:
@staticmethod
def image_io_best_practices():
"""影像I/O最佳實踐"""
practices = [
{
'title': '檢查影像是否成功載入',
'bad_practice': 'img = cv2.imread("image.jpg")',
'good_practice': '''img = cv2.imread("image.jpg")
if img is None:
print("無法載入影像")
# 處理錯誤情況
else:
# 繼續處理影像''',
'reason': '直接使用可能為None的影像會導致後續操作失敗'
},
{
'title': '指定影像讀取模式',
'bad_practice': 'img = cv2.imread("image.jpg")',
'good_practice': '''# 根據需求選擇讀取模式
img_color = cv2.imread("image.jpg", cv2.IMREAD_COLOR) # 彩色影像
img_grayscale = cv2.imread("image.jpg", cv2.IMREAD_GRAYSCALE) # 灰度影像
img_unchanged = cv2.imread("image.jpg", cv2.IMREAD_UNCHANGED) # 包含alpha通道''',
'reason': '明確指定模式可以避免意外行為並提高效率'
},
{
'title': '高效儲存影像',
'bad_practice': 'cv2.imwrite("output.jpg", img)',
'good_practice': '''# 控制JPEG質量
cv2.imwrite("output.jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 95])
# 控制PNG壓縮級別
cv2.imwrite("output.png", img, [cv2.IMWRITE_PNG_COMPRESSION, 9])''',
'reason': '調整壓縮參數可以在質量和文件大小之間取得平衡'
}
]
return practices
@staticmethod
def memory_management_best_practices():
"""記憶體管理最佳實踐"""
practices = [
{
'title': '及時釋放資源',
'bad_practice': '# 忘記釋放攝影機資源',
'good_practice': '''cap = cv2.VideoCapture(0)
try:
while True:
ret, frame = cap.read()
if not ret:
break
# 處理幀
finally:
cap.release()
cv2.destroyAllWindows()''',
'reason': '確保資源被正確釋放,避免記憶體洩漏'
},
{
'title': '使用適當的資料類型',
'bad_practice': '# 對8-bit影像使用浮點運算',
'good_practice': '''# 根據操作選擇資料類型
img_uint8 = image.astype(np.uint8) # 8-bit無符號整數
img_float32 = image.astype(np.float32) # 32-bit浮點數
img_float64 = image.astype(np.float64) # 64-bit浮點數
# 運算後轉換回適當類型
result = np.clip(img_float32 * 1.5, 0, 255).astype(np.uint8)''',
'reason': '適當的資料類型可以節省記憶體並提高運算精度'
},
{
'title': '避免不必要的複製',
'bad_practice': '# 頻繁複製大型影像',
'good_practice': '''# 只有在必要時才複製
img_copy = image.copy() # 深複製
# 使用視圖而不是複製
img_view = image[:100, :100] # 創建視圖,不複製資料
# 就地操作(修改原始影像)
cv2.rectangle(image, (10, 10), (100, 100), (255, 0, 0), 2)''',
'reason': '減少記憶體分配和複製操作可以提高性能'
}
]
return practices
@staticmethod
def performance_best_practices():
"""性能最佳實踐"""
practices = [
{
'title': '向量化操作',
'bad_practice': '''for y in range(height):
for x in range(width):
image[y, x] = image[y, x] * 1.5''',
'good_practice': '''# 使用NumPy向量化操作
image = np.clip(image.astype(np.float32) * 1.5, 0, 255).astype(np.uint8)
# 或使用OpenCV內置函數
image = cv2.convertScaleAbs(image, alpha=1.5, beta=0)''',
'reason': '向量化操作比Python循環快數百倍'
},
{
'title': '預分配記憶體',
'bad_practice': '''results = []
for i in range(1000):
result = process_image(image)
results.append(result)''',
'good_practice': '''# 預分配記憶體
results = np.empty((1000, height, width, 3), dtype=np.uint8)
for i in range(1000):
results[i] = process_image(image)''',
'reason': '預分配避免反覆重新分配記憶體,提高性能'
},
{
'title': '使用適當的演算法參數',
'bad_practice': '''# 使用過高的解析度或質量
resized = cv2.resize(image, (4000, 3000))''',
'good_practice': '''# 根據應用需求選擇適當參數
# 人臉檢測不需要高解析度
resized = cv2.resize(image, (640, 480))
# 調整檢測器參數平衡速度和準確性
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1, # 較小值更準確但更慢
minNeighbors=5, # 較大值減少誤報
minSize=(30, 30) # 最小人臉尺寸
)''',
'reason': '適當的參數可以在保持質量的同時大幅提高性能'
}
]
return practices
@staticmethod
def code_quality_best_practices():
"""代碼質量最佳實踐"""
practices = [
{
'title': '錯誤處理',
'bad_practice': '# 忽略潛在的錯誤',
'good_practice': '''try:
image = cv2.imread("image.jpg")
if image is None:
raise FileNotFoundError("無法載入影像")
# 影像處理操作
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
except FileNotFoundError as e:
print(f"文件錯誤: {e}")
# 恢復或退出
except cv2.error as e:
print(f"OpenCV錯誤: {e}")
# 處理OpenCV特定錯誤
except Exception as e:
print(f"未預期錯誤: {e}")
# 通用錯誤處理''',
'reason': '適當的錯誤處理提高代碼健壯性和可維護性'
},
{
'title': '代碼模組化',
'bad_practice': '# 所有功能寫在一個大函數中',
'good_practice': '''def load_image(path, mode=cv2.IMREAD_COLOR):
"""載入影像並檢查是否成功"""
image = cv2.imread(path, mode)
if image is None:
raise ValueError(f"無法載入影像: {path}")
return image
def preprocess_image(image, target_size=(224, 224)):
"""預處理影像:調整大小、歸一化等"""
resized = cv2.resize(image, target_size)
normalized = resized / 255.0
return normalized
def process_pipeline(image_path):
"""完整的處理流程"""
image = load_image(image_path)
processed = preprocess_image(image)
return processed''',
'reason': '模組化代碼更易於測試、維護和重用'
},
{
'title': '文檔和註釋',
'bad_practice': '# 缺乏文檔和註釋',
'good_practice': '''def detect_objects(image, confidence_threshold=0.5):
"""
使用YOLO模型檢測影像中的物件
參數:
image: 輸入影像 (BGR格式)
confidence_threshold: 置信度閾值,預設0.5
返回:
results: 檢測結果列表,每個元素為(標籤, 置信度, 邊界框)
"""
# 載入模型
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
# 準備輸入blob
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416),
swapRB=True, crop=False)
# ... 其餘代碼 ...
return results''',
'reason': '良好的文檔和註釋提高代碼可讀性和可維護性'
}
]
return practices
@staticmethod
def display_all_best_practices():
"""顯示所有最佳實踐"""
categories = [
("影像I/O最佳實踐", OpenCVBestPractices.image_io_best_practices()),
("記憶體管理最佳實踐", OpenCVBestPractices.memory_management_best_practices()),
("性能最佳實踐", OpenCVBestPractices.performance_best_practices()),
("代碼質量最佳實踐", OpenCVBestPractices.code_quality_best_practices())
]
for category_name, practices in categories:
print(f"\n{'='*60}")
print(category_name)
print('='*60)
for i, practice in enumerate(practices, 1):
print(f"\n{i}. {practice['title']}")
print(f" 原因: {practice['reason']}")
print(f" 不良實踐:\n{practice['bad_practice']}")
print(f" 最佳實踐:\n{practice['good_practice']}")
# 顯示最佳實踐指南
OpenCVBestPractices.display_all_best_practices()
11.2 常見問題與解決方案
python
# OpenCV常見問題與解決方案
class OpenCVFAQ:
@staticmethod
def common_issues_and_solutions():
"""常見問題與解決方案"""
faqs = [
{
'question': '影像載入返回None',
'cause': '文件路徑錯誤、文件損壞或格式不受支援',
'solution': '''1. 檢查文件路徑是否正確
2. 驗證文件是否存在且可讀
3. 確保影像格式受支援
4. 使用絕對路徑代替相對路徑
# 範例代碼:
import os
image_path = "image.jpg"
if not os.path.exists(image_path):
print(f"文件不存在: {image_path}")
else:
image = cv2.imread(image_path)
if image is None:
print(f"無法載入影像,可能是格式不受支援或文件損壞")
else:
print(f"成功載入影像,尺寸: {image.shape}")'''
},
{
'question': '色彩顯示不正確',
'cause': 'OpenCV使用BGR色彩空間,而其他庫通常使用RGB',
'solution': '''# 將BGR轉換為RGB用於顯示
image_bgr = cv2.imread("image.jpg") # OpenCV載入為BGR
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
# 使用Matplotlib顯示
import matplotlib.pyplot as plt
plt.imshow(image_rgb)
plt.show()
# 或將RGB轉換為BGR用於OpenCV操作
image_rgb = plt.imread("image.jpg") # Matplotlib載入為RGB
image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)'''
},
{
'question': '影片處理速度太慢',
'cause': '每幀處理過於複雜、解析度過高或未使用優化',
'solution': '''1. 降低影像解析度
2. 減少每幀的處理操作
3. 使用向量化操作代替循環
4. 啟用多線程或GPU加速
# 優化範例:
def optimized_video_processing():
cap = cv2.VideoCapture(0)
# 降低解析度
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
# 跳過一些幀以提高速度
frame_skip = 2
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
if frame_count % frame_skip != 0:
continue # 跳過此幀
# 使用高效的處理方法
# 例如,只在必要時進行色彩轉換
if need_gray:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 顯示結果
cv2.imshow('Optimized Processing', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()'''
},
{
'question': '記憶體使用過高',
'cause': '未及時釋放資源、影像緩存過多或資料類型不當',
'solution': '''1. 及時釋放攝影機和視窗資源
2. 使用適當的資料類型
3. 釋放不再需要的變數
4. 使用生成器處理大型影像集合
# 記憶體管理範例:
def memory_efficient_processing(image_paths):
for path in image_paths:
# 每次只載入一個影像
image = cv2.imread(path)
if image is None:
continue
# 處理影像
processed = process_image(image)
# 立即釋放原始影像
del image
yield processed
# 強制垃圾回收(在必要時)
import gc
gc.collect()'''
},
{
'question': '檢測演算法準確性低',
'cause': '參數設置不當、影像質量差或光照條件不佳',
'solution': '''1. 調整檢測器參數
2. 預處理影像(去噪、增強對比度等)
3. 使用適當的色彩空間
4. 考慮光照條件
# 改進檢測準確性:
def improve_detection_accuracy(image):
# 1. 預處理影像
# 去噪
denoised = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
# 增強對比度
lab = cv2.cvtColor(denoised, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
l = clahe.apply(l)
enhanced = cv2.merge([l, a, b])
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)
# 2. 調整檢測參數
gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
# 人臉檢測範例
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
# 嘗試不同的參數組合
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.05, # 較小值提高準確性
minNeighbors=6, # 較大值減少誤報
minSize=(50, 50), # 根據應用調整
flags=cv2.CASCADE_SCALE_IMAGE
)
return faces'''
},
{
'question': '跨平台兼容性問題',
'cause': '不同操作系統的差異、依賴庫版本問題或硬體差異',
'solution': '''1. 使用虛擬環境管理依賴
2. 明確指定庫版本
3. 檢查操作系統特定問題
4. 使用條件代碼處理平台差異
# requirements.txt範例:
opencv-python==4.5.5.64
numpy==1.21.6
matplotlib==3.5.2
# 平台兼容性檢查:
import platform
import sys
def check_platform_compatibility():
system = platform.system()
print(f"操作系統: {system}")
print(f"Python版本: {sys.version}")
print(f"OpenCV版本: {cv2.__version__}")
if system == "Windows":
# Windows特定設置
print("運行在Windows系統上")
elif system == "Linux":
# Linux特定設置
print("運行在Linux系統上")
elif system == "Darwin":
# macOS特定設置
print("運行在macOS系統上")
# 檢查必要的功能
if not cv2.ocl.haveOpenCL():
print("警告: OpenCL不可用,某些加速功能可能受限")
return system'''
}
]
return faqs
@staticmethod
def display_faq():
"""顯示常見問題與解決方案"""
faqs = OpenCVFAQ.common_issues_and_solutions()
print("\n" + "="*80)
print("OpenCV常見問題與解決方案")
print("="*80)
for i, faq in enumerate(faqs, 1):
print(f"\n{i}. 問題: {faq['question']}")
print(f" 原因: {faq['cause']}")
print(f" 解決方案:")
print(f"{faq['solution']}")
print("-"*80)
# 顯示常見問題與解決方案
OpenCVFAQ.display_faq()
結論
OpenCV作為一個功能強大的電腦視覺庫,從基礎的影像處理到先進的深度學習應用,提供了完整的解決方案。通過本指南,我們從入門基礎開始,逐步深入到高級應用,涵蓋了:
-
基礎操作:影像讀取、顯示、色彩空間轉換、基本變換
-
進階技巧:濾波、形態學操作、特徵檢測、影像分割
-
高級應用:物件檢測、追蹤、深度學習整合、AR應用
-
性能優化:向量化操作、記憶體管理、平行處理、GPU加速
-
最佳實踐:代碼質量、錯誤處理、跨平台兼容性
未來發展方向
隨著電腦視覺技術的不斷發展,OpenCV也在持續進化。未來的發展方向可能包括:
-
深度學習集成:更好的深度學習模型支援和優化
-
邊緣計算:針對物聯網和移動設備的優化
-
3D視覺:增強的三維重建和點雲處理能力
-
實時性能:更高效的演算法和硬體加速
學習資源建議
-
官方文檔:OpenCV官方文檔是最權威的學習資源
-
GitHub倉庫:查看官方示例和社區貢獻
-
學術論文:了解演算法原理和最新研究
-
實踐專案:通過實際專案加深理解
-
社區論壇:Stack Overflow、OpenCV論壇等
電腦視覺是一個快速發展的領域,持續學習和實踐是掌握這項技術的關鍵。無論是學術研究還是工業應用,OpenCV都將是您強大的工具。祝您在電腦視覺的旅程中取得成功!
更多推荐

所有评论(0)