OpenCV:從像素到智慧視覺系統的全面指南

第一部分:OpenCV入門基礎

第1章 OpenCV簡介與發展歷程

1.1 什麼是OpenCV?

OpenCV(Open Source Computer Vision Library)是一個開源的電腦視覺和機器學習軟體庫,由Intel於1999年首次發布。這個跨平台庫包含了超過2500個優化演算法,涵蓋了電腦視覺的各個領域:人臉識別、物件檢測、影像分割、運動追蹤、增強現實等。如今,OpenCV已成為學術界和工業界最廣泛使用的電腦視覺庫之一,每天有數百萬次下載,被應用於從手機App到自動駕駛系統的各個領域。

1.2 OpenCV的核心優勢

  • 開源免費:BSD許可證允許商業和研究使用

  • 跨平台:支援Windows、Linux、macOS、iOS、Android

  • 多語言綁定:原生C++,並有Python、Java、C#等接口

  • 高效能:底層使用C/C++編寫,並針對Intel處理器優化

  • 豐富的功能:從基礎影像處理到深度學習模型部署

  • 活躍的社群:龐大的開發者社群和豐富的文檔資源

1.3 OpenCV的發展歷程

  • 1999年:Intel研究院啟動了OpenCV項目

  • 2000年:發布第一個alpha版本

  • 2006年:OpenCV 1.0發布

  • 2009年:OpenCV 2.0發布,引入了C++接口

  • 2015年:OpenCV 3.0發布,模組化設計

  • 2018年:OpenCV 4.0發布,專注於深度學習和性能優化

  • 2020年至今:OpenCV 5.x開發中,持續加入新演算法和優化

第2章 OpenCV環境配置

2.1 Python環境安裝

python

# 使用pip安裝OpenCV(完整版)
pip install opencv-python  # 基礎模組
pip install opencv-contrib-python  # 包含額外模組

# 驗證安裝
import cv2
print(f"OpenCV版本: {cv2.__version__}")

# 檢查是否安裝正確
import numpy as np
print(f"NumPy版本: {np.__version__}")

2.2 C++環境配置(Linux)

bash

# Ubuntu/Debian系統安裝
sudo apt update
sudo apt install build-essential cmake git
sudo apt install libopencv-dev python3-opencv

# 編譯安裝最新版本
git clone https://github.com/opencv/opencv.git
git clone https://github.com/opencv/opencv_contrib.git

cd opencv
mkdir build && cd build
cmake -D CMAKE_BUILD_TYPE=RELEASE \
      -D CMAKE_INSTALL_PREFIX=/usr/local \
      -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
      -D WITH_CUDA=OFF \
      -D ENABLE_CXX11=ON \
      -D BUILD_EXAMPLES=ON ..
      
make -j$(nproc)
sudo make install

2.3 基本影像讀取與顯示

python

import cv2
import numpy as np
import matplotlib.pyplot as plt

# 方法1:使用OpenCV讀取影像
img = cv2.imread('image.jpg')  # 讀取影像,BGR格式

# 方法2:使用Matplotlib顯示(需要轉換顏色空間)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.axis('off')
plt.show()

# 方法3:使用OpenCV顯示
cv2.imshow('Image', img)
cv2.waitKey(0)  # 等待按鍵
cv2.destroyAllWindows()

# 影像屬性
print(f"影像形狀: {img.shape}")  # (高度, 寬度, 通道數)
print(f"影像大小: {img.size}")    # 像素總數
print(f"資料類型: {img.dtype}")   # 資料類型

第3章 影像處理基礎

3.1 色彩空間轉換

python

# 色彩空間轉換示例
img = cv2.imread('image.jpg')

# BGR轉灰度圖
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# BGR轉HSV(色調、飽和度、明度)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# BGR轉LAB(亮度、綠色-紅色、藍色-黃色)
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)

# BGR轉YCrCb(亮度、紅色色差、藍色色差)
ycrb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)

# 分離和合併通道
b, g, r = cv2.split(img)
merged = cv2.merge([b, g, r])

# 顯示各種色彩空間
plt.figure(figsize=(15, 8))
plt.subplot(2, 3, 1), plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), plt.title('原始影像')
plt.subplot(2, 3, 2), plt.imshow(gray, cmap='gray'), plt.title('灰度圖')
plt.subplot(2, 3, 3), plt.imshow(cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)), plt.title('HSV')
plt.subplot(2, 3, 4), plt.imshow(cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)), plt.title('LAB')
plt.subplot(2, 3, 5), plt.imshow(cv2.cvtColor(ycrb, cv2.COLOR_YCrCb2RGB)), plt.title('YCrCb')
plt.subplot(2, 3, 6), plt.imshow(cv2.cvtColor(merged, cv2.COLOR_BGR2RGB)), plt.title('合併通道')
plt.tight_layout()
plt.show()

3.2 影像基本操作

python

# 影像幾何變換
# 1. 縮放
height, width = img.shape[:2]
resized = cv2.resize(img, (width//2, height//2))  # 縮小一半

# 保持長寬比的縮放
scale_percent = 50  # 縮放百分比
new_width = int(width * scale_percent / 100)
new_height = int(height * scale_percent / 100)
dim = (new_width, new_height)
resized_proportional = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)

# 2. 旋轉
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, 45, 1.0)  # 旋轉45度,縮放1.0
rotated = cv2.warpAffine(img, M, (w, h))

# 3. 平移
M = np.float32([[1, 0, 100], [0, 1, 50]])  # x方向平移100,y方向平移50
translated = cv2.warpAffine(img, M, (w, h))

# 4. 仿射變換
pts1 = np.float32([[50, 50], [200, 50], [50, 200]])
pts2 = np.float32([[10, 100], [200, 50], [100, 250]])
M_affine = cv2.getAffineTransform(pts1, pts2)
affine = cv2.warpAffine(img, M_affine, (w, h))

# 5. 透視變換
pts1 = np.float32([[56, 65], [368, 52], [28, 387], [389, 390]])
pts2 = np.float32([[0, 0], [300, 0], [0, 300], [300, 300]])
M_perspective = cv2.getPerspectiveTransform(pts1, pts2)
perspective = cv2.warpPerspective(img, M_perspective, (300, 300))

# 顯示所有變換結果
transformations = [
    ("原始影像", img),
    ("縮放", resized_proportional),
    ("旋轉45度", rotated),
    ("平移", translated),
    ("仿射變換", affine),
    ("透視變換", perspective)
]

plt.figure(figsize=(15, 10))
for i, (title, img_t) in enumerate(transformations, 1):
    plt.subplot(2, 3, i)
    plt.imshow(cv2.cvtColor(img_t, cv2.COLOR_BGR2RGB))
    plt.title(title)
    plt.axis('off')
plt.tight_layout()
plt.show()

3.3 影像閾值處理

python

# 讀取灰度影像
gray_img = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)

# 1. 簡單閾值處理
ret, thresh1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO_INV)

# 2. 自適應閾值處理
thresh6 = cv2.adaptiveThreshold(gray_img, 255, 
                                 cv2.ADAPTIVE_THRESH_MEAN_C,
                                 cv2.THRESH_BINARY, 11, 2)

thresh7 = cv2.adaptiveThreshold(gray_img, 255,
                                 cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                 cv2.THRESH_BINARY, 11, 2)

# 3. Otsu閾值處理(自動尋找最佳閾值)
ret, thresh8 = cv2.threshold(gray_img, 0, 255, 
                              cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# 顯示閾值處理結果
titles = ['Original', 'BINARY', 'BINARY_INV', 'TRUNC', 
          'TOZERO', 'TOZERO_INV', 'ADAPTIVE_MEAN', 
          'ADAPTIVE_GAUSSIAN', "OTSU"]
images = [gray_img, thresh1, thresh2, thresh3, thresh4, 
          thresh5, thresh6, thresh7, thresh8]

plt.figure(figsize=(15, 10))
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow(images[i], 'gray')
    plt.title(titles[i])
    plt.axis('off')
plt.tight_layout()
plt.show()

第二部分:OpenCV進階技巧

第4章 影像濾波與增強

4.1 卷積與濾波器

python

# 自定義卷積核
img = cv2.imread('image.jpg')

# 創建自定義卷積核
kernel_identity = np.array([[0, 0, 0],
                            [0, 1, 0],
                            [0, 0, 0]])

kernel_sharpen = np.array([[0, -1, 0],
                           [-1, 5, -1],
                           [0, -1, 0]])

kernel_blur = np.ones((5, 5), np.float32) / 25

kernel_edge_detect = np.array([[-1, -1, -1],
                               [-1, 8, -1],
                               [-1, -1, -1]])

# 應用卷積
filtered_identity = cv2.filter2D(img, -1, kernel_identity)
filtered_sharpen = cv2.filter2D(img, -1, kernel_sharpen)
filtered_blur = cv2.filter2D(img, -1, kernel_blur)
filtered_edge = cv2.filter2D(img, -1, kernel_edge_detect)

# 高斯濾波器
gaussian_blur = cv2.GaussianBlur(img, (5, 5), 0)

# 中值濾波器(去除椒鹽噪聲)
# 首先添加噪聲
def add_salt_pepper_noise(image, prob):
    output = np.copy(image)
    # 椒鹽噪聲
    salt = np.ceil(prob * image.size * 0.5)
    coords = [np.random.randint(0, i-1, int(salt)) for i in image.shape]
    output[coords[0], coords[1], :] = 255
    
    pepper = np.ceil(prob * image.size * 0.5)
    coords = [np.random.randint(0, i-1, int(pepper)) for i in image.shape]
    output[coords[0], coords[1], :] = 0
    return output

noisy_img = add_salt_pepper_noise(img, 0.01)
median_blur = cv2.medianBlur(noisy_img, 5)

# 雙邊濾波器(保邊濾波)
bilateral_filter = cv2.bilateralFilter(img, 9, 75, 75)

# 顯示濾波結果
filters = [
    ("原始影像", img),
    ("銳化", filtered_sharpen),
    ("均值模糊", filtered_blur),
    ("邊緣檢測", filtered_edge),
    ("高斯模糊", gaussian_blur),
    ("椒鹽噪聲", noisy_img),
    ("中值濾波", median_blur),
    ("雙邊濾波", bilateral_filter)
]

plt.figure(figsize=(15, 10))
for i, (title, img_f) in enumerate(filters, 1):
    plt.subplot(2, 4, i)
    plt.imshow(cv2.cvtColor(img_f, cv2.COLOR_BGR2RGB))
    plt.title(title)
    plt.axis('off')
plt.tight_layout()
plt.show()

4.2 形態學操作

python

# 形態學操作示例
img_gray = cv2.imread('text_image.jpg', cv2.IMREAD_GRAYSCALE)
_, binary_img = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)

# 定義結構元素
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
kernel_cross = cv2.getStructuringElement(cv2.MORPH_CROSS, (5, 5))

# 腐蝕(消除邊界點,使影像縮小)
erosion_rect = cv2.erode(binary_img, kernel, iterations=1)
erosion_ellipse = cv2.erode(binary_img, kernel_ellipse, iterations=1)
erosion_cross = cv2.erode(binary_img, kernel_cross, iterations=1)

# 膨脹(將邊界向外部擴張,使影像擴大)
dilation_rect = cv2.dilate(binary_img, kernel, iterations=1)
dilation_ellipse = cv2.dilate(binary_img, kernel_ellipse, iterations=1)
dilation_cross = cv2.dilate(binary_img, kernel_cross, iterations=1)

# 開運算(先腐蝕後膨脹,去除小物件)
opening = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, kernel)

# 閉運算(先膨脹後腐蝕,填充小空洞)
closing = cv2.morphologyEx(binary_img, cv2.MORPH_CLOSE, kernel)

# 形態學梯度(膨脹-腐蝕,獲取物體邊緣)
gradient = cv2.morphologyEx(binary_img, cv2.MORPH_GRADIENT, kernel)

# 頂帽運算(原始影像-開運算,獲取小細節)
tophat = cv2.morphologyEx(binary_img, cv2.MORPH_TOPHAT, kernel)

# 黑帽運算(閉運算-原始影像,獲取暗部細節)
blackhat = cv2.morphologyEx(binary_img, cv2.MORPH_BLACKHAT, kernel)

# 顯示形態學操作結果
morph_ops = [
    ("原始二值影像", binary_img),
    ("腐蝕(RECT)", erosion_rect),
    ("腐蝕(ELLIPSE)", erosion_ellipse),
    ("腐蝕(CROSS)", erosion_cross),
    ("膨脹(RECT)", dilation_rect),
    ("膨脹(ELLIPSE)", dilation_ellipse),
    ("膨脹(CROSS)", dilation_cross),
    ("開運算", opening),
    ("閉運算", closing),
    ("形態學梯度", gradient),
    ("頂帽運算", tophat),
    ("黑帽運算", blackhat)
]

plt.figure(figsize=(15, 12))
for i, (title, img_m) in enumerate(morph_ops, 1):
    plt.subplot(4, 3, i)
    plt.imshow(img_m, cmap='gray')
    plt.title(title)
    plt.axis('off')
plt.tight_layout()
plt.show()

4.3 影像金字塔

python

# 影像金字塔
img = cv2.imread('image.jpg')

# 高斯金字塔
layer = img.copy()
gaussian_pyramid = [layer]
for i in range(6):
    layer = cv2.pyrDown(layer)  # 下採樣
    gaussian_pyramid.append(layer)

# 拉普拉斯金字塔
layer = gaussian_pyramid[5]
laplacian_pyramid = [layer]
for i in range(5, 0, -1):
    size = (gaussian_pyramid[i-1].shape[1], gaussian_pyramid[i-1].shape[0])
    gaussian_expanded = cv2.pyrUp(gaussian_pyramid[i], dstsize=size)
    laplacian = cv2.subtract(gaussian_pyramid[i-1], gaussian_expanded)
    laplacian_pyramid.append(laplacian)

# 影像混合(使用金字塔)
def blend_images(img1, img2, mask):
    # 構建高斯金字塔
    G1 = img1.copy()
    G2 = img2.copy()
    GM = mask.copy()
    
    gp1 = [G1]
    gp2 = [G2]
    gpM = [GM]
    
    for i in range(6):
        G1 = cv2.pyrDown(G1)
        G2 = cv2.pyrDown(G2)
        GM = cv2.pyrDown(GM)
        gp1.append(G1)
        gp2.append(G2)
        gpM.append(GM)
    
    # 構建拉普拉斯金字塔
    lp1 = [gp1[5]]
    lp2 = [gp2[5]]
    
    for i in range(5, 0, -1):
        size = (gp1[i-1].shape[1], gp1[i-1].shape[0])
        GE1 = cv2.pyrUp(gp1[i], dstsize=size)
        GE2 = cv2.pyrUp(gp2[i], dstsize=size)
        
        L1 = cv2.subtract(gp1[i-1], GE1)
        L2 = cv2.subtract(gp2[i-1], GE2)
        
        lp1.append(L1)
        lp2.append(L2)
    
    # 混合金字塔
    LS = []
    for l1, l2, gm in zip(lp1, lp2, reversed(gpM)):
        gm = gm[:, :, np.newaxis] if len(gm.shape) == 2 else gm
        ls = l1 * gm + l2 * (1.0 - gm)
        LS.append(ls)
    
    # 重建影像
    blended = LS[0]
    for i in range(1, 6):
        size = (LS[i].shape[1], LS[i].shape[0])
        blended = cv2.pyrUp(blended, dstsize=size)
        blended = cv2.add(blended, LS[i])
    
    return blended

# 創建兩個影像和遮罩
img1 = cv2.imread('image1.jpg')
img2 = cv2.imread('image2.jpg')
img1 = cv2.resize(img1, (500, 500))
img2 = cv2.resize(img2, (500, 500))

# 創建漸變遮罩
mask = np.zeros((500, 500), dtype=np.float32)
mask[:, :250] = 1.0  # 左邊為img1,右邊為img2
mask = cv2.GaussianBlur(mask, (51, 51), 0)

# 混合影像
blended = blend_images(img1, img2, mask)

# 顯示金字塔和混合結果
plt.figure(figsize=(15, 10))

plt.subplot(2, 4, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始影像')
plt.axis('off')

for i in range(3):
    plt.subplot(2, 4, i+2)
    plt.imshow(cv2.cvtColor(gaussian_pyramid[i], cv2.COLOR_BGR2RGB))
    plt.title(f'高斯金字塔層 {i}')
    plt.axis('off')

plt.subplot(2, 4, 5)
plt.imshow(cv2.cvtColor(img1, cv2.COLOR_BGR2RGB))
plt.title('影像1')
plt.axis('off')

plt.subplot(2, 4, 6)
plt.imshow(cv2.cvtColor(img2, cv2.COLOR_BGR2RGB))
plt.title('影像2')
plt.axis('off')

plt.subplot(2, 4, 7)
plt.imshow(mask, cmap='gray')
plt.title('混合遮罩')
plt.axis('off')

plt.subplot(2, 4, 8)
plt.imshow(cv2.cvtColor(blended.astype(np.uint8), cv2.COLOR_BGR2RGB))
plt.title('金字塔混合結果')
plt.axis('off')

plt.tight_layout()
plt.show()

第5章 影像特徵檢測與描述

5.1 邊緣檢測

python

# 邊緣檢測演算法比較
img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)

# 1. Sobel算子
sobel_x = cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=5)
sobel_y = cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=5)
sobel_combined = cv2.magnitude(sobel_x, sobel_y)

# 2. Scharr算子(對Sobel的優化)
scharr_x = cv2.Scharr(img_gray, cv2.CV_64F, 1, 0)
scharr_y = cv2.Scharr(img_gray, cv2.CV_64F, 0, 1)
scharr_combined = cv2.magnitude(scharr_x, scharr_y)

# 3. Laplacian算子
laplacian = cv2.Laplacian(img_gray, cv2.CV_64F)

# 4. Canny邊緣檢測(最常用)
canny_edges = cv2.Canny(img_gray, 100, 200)  # 閾值可調整

# 自適應Canny邊緣檢測
def auto_canny(image, sigma=0.33):
    # 計算影像的像素強度中位數
    v = np.median(image)
    
    # 根據中位數設置閾值
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    
    # 應用Canny邊緣檢測
    edged = cv2.Canny(image, lower, upper)
    
    return edged

auto_canny_edges = auto_canny(img_gray)

# 顯示各種邊緣檢測結果
edge_detectors = [
    ("原始灰度圖", img_gray),
    ("Sobel X", cv2.convertScaleAbs(sobel_x)),
    ("Sobel Y", cv2.convertScaleAbs(sobel_y)),
    ("Sobel Combined", cv2.convertScaleAbs(sobel_combined)),
    ("Scharr X", cv2.convertScaleAbs(scharr_x)),
    ("Scharr Y", cv2.convertScaleAbs(scharr_y)),
    ("Scharr Combined", cv2.convertScaleAbs(scharr_combined)),
    ("Laplacian", cv2.convertScaleAbs(laplacian)),
    ("Canny (100,200)", canny_edges),
    ("Auto Canny", auto_canny_edges)
]

plt.figure(figsize=(15, 12))
for i, (title, img_e) in enumerate(edge_detectors, 1):
    plt.subplot(4, 3, i)
    plt.imshow(img_e, cmap='gray')
    plt.title(title)
    plt.axis('off')
plt.tight_layout()
plt.show()

5.2 角點檢測

python

# 角點檢測演算法
img = cv2.imread('chessboard.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# 1. Harris角點檢測
gray_float = np.float32(gray)
harris_response = cv2.cornerHarris(gray_float, blockSize=2, ksize=3, k=0.04)

# 標記角點
img_harris = img.copy()
img_harris[harris_response > 0.01 * harris_response.max()] = [0, 0, 255]

# 2. Shi-Tomasi角點檢測(改進的Harris)
corners = cv2.goodFeaturesToTrack(gray, maxCorners=100, 
                                  qualityLevel=0.01, 
                                  minDistance=10)
corners = np.int0(corners)

img_shitomasi = img.copy()
for corner in corners:
    x, y = corner.ravel()
    cv2.circle(img_shitomasi, (x, y), 3, (0, 255, 0), -1)

# 3. FAST角點檢測
fast = cv2.FastFeatureDetector_create()
fast.setNonmaxSuppression(True)
fast_keypoints = fast.detect(gray, None)

img_fast = img.copy()
img_fast = cv2.drawKeypoints(img, fast_keypoints, None, 
                             color=(255, 0, 0))

# 4. ORB特徵檢測(結合FAST和BRIEF)
orb = cv2.ORB_create(nfeatures=1000)
orb_keypoints, orb_descriptors = orb.detectAndCompute(gray, None)

img_orb = img.copy()
img_orb = cv2.drawKeypoints(img, orb_keypoints, None, 
                            color=(0, 255, 255), 
                            flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

# 5. SIFT特徵檢測
try:
    sift = cv2.SIFT_create()
    sift_keypoints, sift_descriptors = sift.detectAndCompute(gray, None)
    
    img_sift = img.copy()
    img_sift = cv2.drawKeypoints(img, sift_keypoints, None, 
                                 color=(255, 0, 255),
                                 flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
except:
    img_sift = img.copy()
    cv2.putText(img_sift, "SIFT not available", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

# 顯示各種角點檢測結果
corner_detectors = [
    ("Harris角點檢測", img_harris),
    ("Shi-Tomasi角點檢測", img_shitomasi),
    ("FAST角點檢測", img_fast),
    ("ORB特徵檢測", img_orb),
    ("SIFT特徵檢測", img_sift)
]

plt.figure(figsize=(15, 8))
for i, (title, img_c) in enumerate(corner_detectors, 1):
    plt.subplot(2, 3, i)
    plt.imshow(cv2.cvtColor(img_c, cv2.COLOR_BGR2RGB))
    plt.title(title)
    plt.axis('off')
plt.tight_layout()
plt.show()

5.3 特徵匹配

python

# 特徵匹配示例
img1 = cv2.imread('object.jpg', cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE)

# 初始化特徵檢測器
orb = cv2.ORB_create(nfeatures=1000)

# 檢測關鍵點和描述符
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)

# 暴力匹配器
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

# 匹配描述符
matches = bf.match(des1, des2)

# 按距離排序
matches = sorted(matches, key=lambda x: x.distance)

# 繪製最佳匹配
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, 
                              matches[:50], None, 
                              flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

# FLANN匹配器(適合SIFT、SURF等)
try:
    # 創建SIFT檢測器
    sift = cv2.SIFT_create()
    
    # 檢測關鍵點和描述符
    kp1_sift, des1_sift = sift.detectAndCompute(img1, None)
    kp2_sift, des2_sift = sift.detectAndCompute(img2, None)
    
    # FLANN參數
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)
    
    # 創建FLANN匹配器
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    
    # 進行KNN匹配
    matches_flann = flann.knnMatch(des1_sift, des2_sift, k=2)
    
    # 應用Lowe's比率測試
    good_matches = []
    for m, n in matches_flann:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)
    
    # 繪製匹配結果
    img_flann_matches = cv2.drawMatches(img1, kp1_sift, img2, kp2_sift,
                                        good_matches[:50], None,
                                        flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
except:
    img_flann_matches = np.zeros_like(img_matches)

# 顯示匹配結果
plt.figure(figsize=(15, 8))

plt.subplot(1, 3, 1)
plt.imshow(img1, cmap='gray')
plt.title('查詢影像')
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(img_matches, cmap='gray')
plt.title('ORB暴力匹配 (前50個匹配)')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(img_flann_matches, cmap='gray')
plt.title('SIFT FLANN匹配 (Lowe\'s比率測試)')
plt.axis('off')

plt.tight_layout()
plt.show()

第6章 影像分割與輪廓分析

6.1 影像分割

python

# 影像分割方法
img = cv2.imread('objects.jpg')

# 1. 基於閾值的分割
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh_binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

# 2. 基於邊緣的分割
edges = cv2.Canny(gray, 100, 200)

# 3. 基於區域的分割(分水嶺演算法)
def watershed_segmentation(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 應用閾值
    _, thresh = cv2.threshold(gray, 0, 255, 
                              cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # 噪聲去除
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
    
    # 確定背景區域
    sure_bg = cv2.dilate(opening, kernel, iterations=3)
    
    # 確定前景區域
    dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
    _, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 
                               255, 0)
    
    # 找到未知區域
    sure_fg = np.uint8(sure_fg)
    unknown = cv2.subtract(sure_bg, sure_fg)
    
    # 標記標籤
    _, markers = cv2.connectedComponents(sure_fg)
    
    # 為分水嶺演算法增加1
    markers = markers + 1
    
    # 標記未知區域為0
    markers[unknown == 255] = 0
    
    # 應用分水嶺演算法
    markers = cv2.watershed(image, markers)
    image[markers == -1] = [255, 0, 0]
    
    return image, markers

watershed_result, markers = watershed_segmentation(img.copy())

# 4. GrabCut分割(互動式分割)
def grabcut_segmentation(image, rect=None):
    mask = np.zeros(image.shape[:2], np.uint8)
    
    if rect is None:
        rect = (50, 50, image.shape[1]-100, image.shape[0]-100)
    
    # 背景和前景模型
    bgd_model = np.zeros((1, 65), np.float64)
    fgd_model = np.zeros((1, 65), np.float64)
    
    # 應用GrabCut
    cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 
                5, cv2.GC_INIT_WITH_RECT)
    
    # 創建遮罩
    mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
    
    # 應用遮罩
    result = image * mask2[:, :, np.newaxis]
    
    return result

grabcut_result = grabcut_segmentation(img.copy())

# 5. K-means色彩分割
def kmeans_segmentation(image, k=3):
    # 重塑影像為2D像素陣列
    pixel_values = image.reshape((-1, 3))
    pixel_values = np.float32(pixel_values)
    
    # 定義停止條件
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
    
    # 執行K-means聚類
    _, labels, centers = cv2.kmeans(pixel_values, k, None, 
                                    criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    
    # 轉換回8-bit值
    centers = np.uint8(centers)
    
    # 扁平化標籤陣列
    labels_flat = labels.flatten()
    
    # 將每個像素轉換為對應的中心顏色
    segmented_image = centers[labels_flat]
    
    # 重塑為原始影像形狀
    segmented_image = segmented_image.reshape(image.shape)
    
    return segmented_image

kmeans_result = kmeans_segmentation(img, k=4)

# 顯示分割結果
segmentation_methods = [
    ("原始影像", img),
    ("閾值分割", cv2.cvtColor(thresh_binary, cv2.COLOR_GRAY2BGR)),
    ("邊緣分割", cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)),
    ("分水嶺演算法", watershed_result),
    ("GrabCut分割", grabcut_result),
    ("K-means色彩分割", kmeans_result)
]

plt.figure(figsize=(15, 10))
for i, (title, img_s) in enumerate(segmentation_methods, 1):
    plt.subplot(2, 3, i)
    plt.imshow(cv2.cvtColor(img_s, cv2.COLOR_BGR2RGB))
    plt.title(title)
    plt.axis('off')
plt.tight_layout()
plt.show()

6.2 輪廓檢測與分析

python

# 輪廓檢測與分析
img = cv2.imread('shapes.jpg')
img_copy = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# 二值化影像
_, binary = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)

# 尋找輪廓
contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, 
                                       cv2.CHAIN_APPROX_SIMPLE)

# 繪製所有輪廓
img_contours = img.copy()
cv2.drawContours(img_contours, contours, -1, (0, 255, 0), 3)

# 輪廓分析
img_analysis = img.copy()

for i, contour in enumerate(contours):
    # 計算輪廓特徵
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)
    
    # 輪廓近似(減少點數)
    epsilon = 0.02 * perimeter
    approx = cv2.approxPolyDP(contour, epsilon, True)
    
    # 計算凸包
    hull = cv2.convexHull(contour)
    
    # 計算邊界矩形
    x, y, w, h = cv2.boundingRect(contour)
    
    # 計算最小包圍矩形
    rect = cv2.minAreaRect(contour)
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    
    # 計算最小包圍圓
    (x_circle, y_circle), radius = cv2.minEnclosingCircle(contour)
    center_circle = (int(x_circle), int(y_circle))
    radius = int(radius)
    
    # 計算橢圓擬合
    if len(contour) >= 5:
        ellipse = cv2.fitEllipse(contour)
    
    # 計算形狀特徵
    # 1. 圓度
    circularity = 4 * np.pi * area / (perimeter * perimeter) if perimeter > 0 else 0
    
    # 2. 伸長度
    aspect_ratio = float(w) / h if h > 0 else 0
    
    # 3. 實心度
    hull_area = cv2.contourArea(hull)
    solidity = float(area) / hull_area if hull_area > 0 else 0
    
    # 在影像上標註
    cv2.putText(img_analysis, f'C{i}', (x, y-10), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
    
    # 繪製邊界矩形
    cv2.rectangle(img_analysis, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
    # 繪製最小包圍矩形
    cv2.drawContours(img_analysis, [box], 0, (0, 0, 255), 2)
    
    # 繪製最小包圍圓
    cv2.circle(img_analysis, center_circle, radius, (255, 0, 0), 2)
    
    # 繪製凸包
    cv2.drawContours(img_analysis, [hull], 0, (255, 255, 0), 2)
    
    # 列印輪廓資訊
    print(f"輪廓 {i}:")
    print(f"  面積: {area:.2f}")
    print(f"  周長: {perimeter:.2f}")
    print(f"  圓度: {circularity:.3f}")
    print(f"  伸長度: {aspect_ratio:.3f}")
    print(f"  實心度: {solidity:.3f}")
    print(f"  近似頂點數: {len(approx)}")
    print()

# 輪廓層次結構分析
img_hierarchy = img.copy()

# 繪製輪廓並顯示層次結構
for i, (contour, hier) in enumerate(zip(contours, hierarchy[0])):
    # 根據層次深度選擇顏色
    depth = 0
    current_hier = hier
    while current_hier[3] != -1:
        depth += 1
        current_hier = hierarchy[0][current_hier[3]]
    
    # 為不同深度的輪廓分配不同顏色
    color = (
        int(255 * (depth % 3 == 0)),
        int(255 * (depth % 3 == 1)),
        int(255 * (depth % 3 == 2))
    )
    
    cv2.drawContours(img_hierarchy, [contour], -1, color, 2)
    
    # 計算中心點
    M = cv2.moments(contour)
    if M['m00'] != 0:
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])
        cv2.putText(img_hierarchy, f'{i}({depth})', (cx-20, cy), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

# 顯示輪廓分析結果
plt.figure(figsize=(15, 10))

plt.subplot(2, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始影像')
plt.axis('off')

plt.subplot(2, 3, 2)
plt.imshow(binary, cmap='gray')
plt.title('二值化影像')
plt.axis('off')

plt.subplot(2, 3, 3)
plt.imshow(cv2.cvtColor(img_contours, cv2.COLOR_BGR2RGB))
plt.title(f'檢測到{len(contours)}個輪廓')
plt.axis('off')

plt.subplot(2, 3, 4)
plt.imshow(cv2.cvtColor(img_analysis, cv2.COLOR_BGR2RGB))
plt.title('輪廓特徵分析')
plt.axis('off')

plt.subplot(2, 3, 5)
plt.imshow(cv2.cvtColor(img_hierarchy, cv2.COLOR_BGR2RGB))
plt.title('輪廓層次結構')
plt.axis('off')

plt.tight_layout()
plt.show()

第三部分:OpenCV高級應用與實戰

第7章 物件檢測與追蹤

7.1 傳統物件檢測方法

python

# 傳統物件檢測方法
# 1. 模板匹配
def template_matching_demo():
    img = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE)
    template = cv2.imread('template.jpg', cv2.IMREAD_GRAYSCALE)
    
    w, h = template.shape[::-1]
    
    # 使用不同的匹配方法
    methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 
               'cv2.TM_CCORR', 'cv2.TM_CCORR_NORMED',
               'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
    
    results = []
    
    for meth in methods:
        method = eval(meth)
        
        # 應用模板匹配
        res = cv2.matchTemplate(img, template, method)
        
        # 找到最佳匹配位置
        if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
            top_left = min_loc
        else:
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
            top_left = max_loc
            
        bottom_right = (top_left[0] + w, top_left[1] + h)
        
        # 繪製矩形
        img_display = cv2.cvtColor(img.copy(), cv2.COLOR_GRAY2BGR)
        cv2.rectangle(img_display, top_left, bottom_right, (0, 255, 0), 2)
        
        results.append((meth, img_display))
    
    return results

# 2. 特徵匹配物件檢測
def feature_matching_object_detection():
    img1 = cv2.imread('object.jpg', cv2.IMREAD_GRAYSCALE)  # 物件影像
    img2 = cv2.imread('scene.jpg', cv2.IMREAD_GRAYSCALE)   # 場景影像
    
    # 初始化ORB檢測器
    orb = cv2.ORB_create(1000)
    
    # 檢測關鍵點和描述符
    kp1, des1 = orb.detectAndCompute(img1, None)
    kp2, des2 = orb.detectAndCompute(img2, None)
    
    # 創建BFMatcher物件
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    
    # 匹配描述符
    matches = bf.match(des1, des2)
    
    # 按距離排序
    matches = sorted(matches, key=lambda x: x.distance)
    
    # 繪製前50個匹配
    img_matches = cv2.drawMatches(img1, kp1, img2, kp2, 
                                  matches[:50], None, 
                                  flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    
    # 找到物件位置(使用單應性矩陣)
    if len(matches) >= 4:
        src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
        
        # 計算單應性矩陣
        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        
        if M is not None:
            h, w = img1.shape
            pts = np.float32([[0, 0], [0, h-1], [w-1, h-1], [w-1, 0]]).reshape(-1, 1, 2)
            
            # 應用透視變換
            dst = cv2.perspectiveTransform(pts, M)
            
            # 在場景影像上繪製邊界
            img2_color = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
            img2_color = cv2.polylines(img2_color, [np.int32(dst)], 
                                       True, (0, 255, 0), 3, cv2.LINE_AA)
        else:
            img2_color = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
    else:
        img2_color = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
    
    return img_matches, img2_color

# 執行物件檢測
template_results = template_matching_demo()
feature_matches, object_detected = feature_matching_object_detection()

# 顯示結果
plt.figure(figsize=(20, 12))

# 顯示模板匹配結果
for i, (title, img_t) in enumerate(template_results[:3], 1):
    plt.subplot(3, 4, i)
    plt.imshow(cv2.cvtColor(img_t, cv2.COLOR_BGR2RGB))
    plt.title(title[4:])
    plt.axis('off')

for i, (title, img_t) in enumerate(template_results[3:], 4):
    plt.subplot(3, 4, i)
    plt.imshow(cv2.cvtColor(img_t, cv2.COLOR_BGR2RGB))
    plt.title(title[4:])
    plt.axis('off')

# 顯示特徵匹配結果
plt.subplot(3, 4, 9)
plt.imshow(feature_matches, cmap='gray')
plt.title('特徵匹配')
plt.axis('off')

plt.subplot(3, 4, 10)
plt.imshow(cv2.cvtColor(object_detected, cv2.COLOR_BGR2RGB))
plt.title('物件檢測結果')
plt.axis('off')

plt.tight_layout()
plt.show()

7.2 基於Haar Cascade的人臉檢測

python

# Haar Cascade人臉檢測
def haar_cascade_face_detection():
    # 載入預訓練的級聯分類器
    face_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    )
    eye_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_eye.xml'
    )
    smile_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_smile.xml'
    )
    
    # 讀取影像
    img = cv2.imread('group_photo.jpg')
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # 檢測人臉
    faces = face_cascade.detectMultiScale(
        gray,
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=(30, 30),
        flags=cv2.CASCADE_SCALE_IMAGE
    )
    
    print(f"檢測到 {len(faces)} 張人臉")
    
    # 繪製人臉矩形
    img_faces = img.copy()
    for (x, y, w, h) in faces:
        cv2.rectangle(img_faces, (x, y), (x+w, y+h), (255, 0, 0), 2)
        
        # 在每個人臉區域內檢測眼睛
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = img_faces[y:y+h, x:x+w]
        
        eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
        for (ex, ey, ew, eh) in eyes:
            cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
        
        # 在每個人臉區域內檢測微笑
        smiles = smile_cascade.detectMultiScale(roi_gray, 1.8, 20)
        for (sx, sy, sw, sh) in smiles:
            cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 0, 255), 2)
    
    # 使用不同的參數進行檢測比較
    scale_factors = [1.1, 1.2, 1.3]
    min_neighbors_list = [3, 5, 7]
    
    comparison_results = []
    
    for scale in scale_factors:
        for neighbors in min_neighbors_list:
            faces_test = face_cascade.detectMultiScale(
                gray,
                scaleFactor=scale,
                minNeighbors=neighbors,
                minSize=(30, 30)
            )
            
            img_test = img.copy()
            for (x, y, w, h) in faces_test:
                cv2.rectangle(img_test, (x, y), (x+w, y+h), (0, 255, 0), 2)
            
            comparison_results.append((
                f"scale={scale}, neighbors={neighbors}\n檢測到 {len(faces_test)} 張人臉",
                img_test
            ))
    
    return img_faces, comparison_results

# 執行人臉檢測
img_faces, comparisons = haar_cascade_face_detection()

# 顯示結果
plt.figure(figsize=(15, 12))

plt.subplot(3, 3, 1)
plt.imshow(cv2.cvtColor(img_faces, cv2.COLOR_BGR2RGB))
plt.title('完整人臉檢測(含眼睛和微笑)')
plt.axis('off')

for i, (title, img_c) in enumerate(comparisons, 2):
    plt.subplot(3, 3, i)
    plt.imshow(cv2.cvtColor(img_c, cv2.COLOR_BGR2RGB))
    plt.title(title, fontsize=9)
    plt.axis('off')

plt.tight_layout()
plt.show()

7.3 物件追蹤演算法

python

# 物件追蹤演算法比較
def object_tracking_comparison(video_path='test_video.mp4'):
    # 初始化追蹤器列表
    tracker_types = ['BOOSTING', 'MIL', 'KCF', 'TLD', 
                     'MEDIANFLOW', 'MOSSE', 'CSRT']
    
    # 創建追蹤器字典
    tracker_dict = {
        'BOOSTING': cv2.legacy.TrackerBoosting_create,
        'MIL': cv2.legacy.TrackerMIL_create,
        'KCF': cv2.legacy.TrackerKCF_create,
        'TLD': cv2.legacy.TrackerTLD_create,
        'MEDIANFLOW': cv2.legacy.TrackerMedianFlow_create,
        'MOSSE': cv2.legacy.TrackerMOSSE_create,
        'CSRT': cv2.legacy.TrackerCSRT_create
    }
    
    # 讀取影片
    cap = cv2.VideoCapture(video_path)
    
    # 讀取第一幀
    ret, frame = cap.read()
    if not ret:
        print("無法讀取影片")
        return
    
    # 選擇ROI(感興趣區域)
    bbox = cv2.selectROI("選擇追蹤物件", frame, False)
    cv2.destroyWindow("選擇追蹤物件")
    
    # 初始化追蹤器
    trackers = {}
    tracker_results = {}
    
    for tracker_type in tracker_types:
        tracker = tracker_dict[tracker_type]()
        trackers[tracker_type] = tracker
        tracker.init(frame, bbox)
        tracker_results[tracker_type] = []
    
    # 追蹤過程
    frames = [frame.copy()]
    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), 
              (255, 255, 0), (255, 0, 255), (0, 255, 255), (128, 128, 128)]
    
    frame_count = 0
    max_frames = 100  # 限制處理的幀數
    
    while frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        
        # 更新每個追蹤器
        for (tracker_type, tracker), color in zip(trackers.items(), colors):
            success, bbox = tracker.update(frame)
            
            if success:
                # 追蹤成功:繪製邊界框
                p1 = (int(bbox[0]), int(bbox[1]))
                p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
                cv2.rectangle(frame, p1, p2, color, 2, 1)
                
                # 在框上顯示追蹤器類型
                cv2.putText(frame, tracker_type, (p1[0], p1[1]-10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                
                # 儲存結果
                tracker_results[tracker_type].append({
                    'frame': frame_count,
                    'bbox': bbox,
                    'success': True
                })
            else:
                # 追蹤失敗
                cv2.putText(frame, "追蹤失敗", (100, 80),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
                
                tracker_results[tracker_type].append({
                    'frame': frame_count,
                    'success': False
                })
        
        # 每隔10幀儲存一幀用於顯示
        if frame_count % 10 == 0:
            frames.append(frame.copy())
        
        # 顯示進度
        if frame_count % 10 == 0:
            print(f"處理第 {frame_count} 幀...")
    
    cap.release()
    
    # 分析追蹤性能
    performance_stats = {}
    for tracker_type in tracker_types:
        results = tracker_results[tracker_type]
        success_count = sum(1 for r in results if r['success'])
        success_rate = success_count / len(results) * 100 if results else 0
        
        performance_stats[tracker_type] = {
            'success_rate': success_rate,
            'total_frames': len(results),
            'success_frames': success_count
        }
    
    # 顯示追蹤結果
    plt.figure(figsize=(15, 10))
    
    # 顯示選取的幀
    for i, frame in enumerate(frames[:min(6, len(frames))]):
        plt.subplot(2, 3, i+1)
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.title(f'第 {i*10} 幀')
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # 顯示性能統計
    print("\n追蹤器性能比較:")
    print("=" * 60)
    for tracker_type, stats in performance_stats.items():
        print(f"{tracker_type:12s} | 成功率: {stats['success_rate']:6.2f}% | "
              f"成功幀數: {stats['success_frames']:3d}/{stats['total_frames']:3d}")
    
    return performance_stats

# 執行追蹤比較(如果沒有影片文件,可以跳過這部分)
try:
    # 注意:需要準備測試影片或使用相機
    # performance_stats = object_tracking_comparison()
    pass
except Exception as e:
    print(f"追蹤演示跳過:{e}")

第8章 深度學習與OpenCV

8.1 使用OpenCV加載深度學習模型

python

# 使用OpenCV加載深度學習模型進行物件檢測
def deep_learning_object_detection():
    # 載入COCO類別標籤
    class_names = []
    with open('coco.names', 'r') as f:
        class_names = [line.strip() for line in f.readlines()]
    
    # 載入模型配置和權重
    model_config = 'yolov3.cfg'
    model_weights = 'yolov3.weights'
    
    # 載入網路
    net = cv2.dnn.readNetFromDarknet(model_config, model_weights)
    
    # 使用GPU(如果可用)
    try:
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        print("使用CUDA加速")
    except:
        print("使用CPU")
    
    # 讀取影像
    img = cv2.imread('street_scene.jpg')
    height, width = img.shape[:2]
    
    # 準備輸入blob
    blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), 
                                 swapRB=True, crop=False)
    
    # 設定網路輸入
    net.setInput(blob)
    
    # 獲取輸出層名稱
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    
    # 前向傳播
    outputs = net.forward(output_layers)
    
    # 處理檢測結果
    boxes = []
    confidences = []
    class_ids = []
    
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            
            if confidence > 0.5:  # 置信度閾值
                # 計算邊界框座標
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                
                # 矩形左上角座標
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    
    # 應用非最大抑制
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    
    # 繪製檢測結果
    colors = np.random.uniform(0, 255, size=(len(class_names), 3))
    
    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]
            label = str(class_names[class_ids[i]])
            confidence = confidences[i]
            color = colors[class_ids[i]]
            
            # 繪製邊界框
            cv2.rectangle(img, (x, y), (x+w, y+h), color, 2)
            
            # 繪製標籤和置信度
            text = f"{label}: {confidence:.2f}"
            cv2.putText(img, text, (x, y-5), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    
    return img, len(indices)

# 人臉識別與情感分析
def face_recognition_emotion():
    # 載入人臉檢測模型
    face_prototxt = 'deploy.prototxt'
    face_model = 'res10_300x300_ssd_iter_140000.caffemodel'
    face_net = cv2.dnn.readNetFromCaffe(face_prototxt, face_model)
    
    # 載入情感識別模型
    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 
                      'Neutral', 'Sad', 'Surprise']
    
    emotion_net = cv2.dnn.readNetFromTensorflow(
        'emotion_model.pb', 'emotion_model.pbtxt'
    )
    
    # 讀取影像
    img = cv2.imread('people.jpg')
    (h, w) = img.shape[:2]
    
    # 準備人臉檢測的blob
    blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0,
                                 (300, 300), (104.0, 177.0, 123.0))
    
    # 人臉檢測
    face_net.setInput(blob)
    detections = face_net.forward()
    
    # 處理檢測結果
    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        
        if confidence > 0.5:  # 置信度閾值
            # 計算邊界框
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            
            # 確保邊界框在影像範圍內
            startX = max(0, startX)
            startY = max(0, startY)
            endX = min(w, endX)
            endY = min(h, endY)
            
            # 提取人臉ROI
            face_roi = img[startY:endY, startX:endX]
            
            if face_roi.size == 0:
                continue
            
            # 情感分析
            face_blob = cv2.dnn.blobFromImage(face_roi, 1.0, (64, 64),
                                             (0, 0, 0), swapRB=True, crop=False)
            emotion_net.setInput(face_blob)
            preds = emotion_net.forward()
            
            # 獲取情感標籤
            emotion_label = emotion_labels[np.argmax(preds)]
            emotion_confidence = np.max(preds)
            
            # 繪製結果
            # 繪製人臉邊界框
            cv2.rectangle(img, (startX, startY), (endX, endY),
                         (0, 255, 0), 2)
            
            # 顯示情感結果
            text = f"{emotion_label}: {emotion_confidence:.2f}"
            y = startY - 10 if startY - 10 > 10 else startY + 10
            cv2.putText(img, text, (startX, y),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
    
    return img

# 執行深度學習範例
try:
    # 物件檢測
    detected_img, num_objects = deep_learning_object_detection()
    
    # 人臉情感識別
    emotion_img = face_recognition_emotion()
    
    # 顯示結果
    plt.figure(figsize=(15, 7))
    
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(detected_img, cv2.COLOR_BGR2RGB))
    plt.title(f'YOLO物件檢測 (檢測到 {num_objects} 個物件)')
    plt.axis('off')
    
    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(emotion_img, cv2.COLOR_BGR2RGB))
    plt.title('人臉檢測與情感分析')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
except Exception as e:
    print(f"深度學習範例跳過:{e}")
    print("注意:需要下載預訓練模型文件才能運行此示例")

8.2 實時影像處理與分析

python

# 實時影像處理類別
class RealTimeCVProcessor:
    def __init__(self, camera_id=0):
        self.cap = cv2.VideoCapture(camera_id)
        self.running = False
        self.mode = 'original'  # 處理模式
        self.filters = {
            'original': self.apply_original,
            'gray': self.apply_gray,
            'edges': self.apply_edges,
            'blur': self.apply_blur,
            'face_detect': self.apply_face_detect,
            'motion_detect': self.apply_motion_detect,
            'color_track': self.apply_color_track
        }
        
    def apply_original(self, frame):
        return frame
    
    def apply_gray(self, frame):
        return cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    def apply_edges(self, frame):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 100, 200)
        return cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
    
    def apply_blur(self, frame):
        return cv2.GaussianBlur(frame, (15, 15), 0)
    
    def apply_face_detect(self, frame):
        # 載入人臉檢測器
        face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
        )
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        
        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        
        return frame
    
    def apply_motion_detect(self, frame):
        if not hasattr(self, 'prev_gray'):
            self.prev_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            return frame
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame_diff = cv2.absdiff(self.prev_gray, gray)
        _, motion_mask = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY)
        
        # 尋找運動區域的輪廓
        contours, _ = cv2.findContours(motion_mask, cv2.RETR_EXTERNAL, 
                                       cv2.CHAIN_APPROX_SIMPLE)
        
        for contour in contours:
            if cv2.contourArea(contour) > 500:  # 忽略小區域
                x, y, w, h = cv2.boundingRect(contour)
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        
        self.prev_gray = gray
        return frame
    
    def apply_color_track(self, frame):
        # 轉換到HSV色彩空間
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        
        # 定義紅色範圍
        lower_red1 = np.array([0, 100, 100])
        upper_red1 = np.array([10, 255, 255])
        lower_red2 = np.array([160, 100, 100])
        upper_red2 = np.array([180, 255, 255])
        
        # 創建紅色遮罩
        mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
        mask = cv2.bitwise_or(mask1, mask2)
        
        # 形態學操作
        kernel = np.ones((5, 5), np.uint8)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
        
        # 尋找輪廓
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, 
                                       cv2.CHAIN_APPROX_SIMPLE)
        
        for contour in contours:
            if cv2.contourArea(contour) > 500:
                x, y, w, h = cv2.boundingRect(contour)
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
                cv2.putText(frame, 'Red Object', (x, y-10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        
        return frame
    
    def set_mode(self, mode):
        if mode in self.filters:
            self.mode = mode
    
    def run(self):
        self.running = True
        print("實時影像處理啟動")
        print("按鍵說明:")
        print("  '1' - 原始影像")
        print("  '2' - 灰度影像")
        print("  '3' - 邊緣檢測")
        print("  '4' - 模糊效果")
        print("  '5' - 人臉檢測")
        print("  '6' - 運動檢測")
        print("  '7' - 顏色追蹤")
        print("  'q' - 退出")
        
        while self.running:
            ret, frame = self.cap.read()
            if not ret:
                break
            
            # 應用當前模式的處理
            processed = self.filters[self.mode](frame.copy())
            
            # 顯示處理模式
            cv2.putText(processed, f"Mode: {self.mode}", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            # 顯示影像
            cv2.imshow('Real-Time CV Processing', processed)
            
            # 處理按鍵事件
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            elif key == ord('1'):
                self.set_mode('original')
            elif key == ord('2'):
                self.set_mode('gray')
            elif key == ord('3'):
                self.set_mode('edges')
            elif key == ord('4'):
                self.set_mode('blur')
            elif key == ord('5'):
                self.set_mode('face_detect')
            elif key == ord('6'):
                self.set_mode('motion_detect')
            elif key == ord('7'):
                self.set_mode('color_track')
        
        self.cleanup()
    
    def cleanup(self):
        self.running = False
        self.cap.release()
        cv2.destroyAllWindows()
        print("實時影像處理結束")

# 執行實時影像處理
# 注意:這需要連接攝影機
try:
    processor = RealTimeCVProcessor()
    processor.run()
except Exception as e:
    print(f"實時處理演示跳過:{e}")

第9章 專案實戰:完整的電腦視覺應用

9.1 智慧停車場管理系統

python

# 智慧停車場管理系統
class SmartParkingSystem:
    def __init__(self, video_source=0):
        self.cap = cv2.VideoCapture(video_source)
        self.parking_spots = []  # 停車位列表
        self.spot_status = {}    # 停車位狀態
        self.spot_counter = 0    # 停車位計數器
        
    def define_parking_spots(self, frame):
        """手動定義停車位"""
        print("點擊並拖曳定義停車位,按's'保存,'r'重置,'q'退出")
        
        spots = []
        current_spot = []
        
        def mouse_callback(event, x, y, flags, param):
            if event == cv2.EVENT_LBUTTONDOWN:
                current_spot.append((x, y))
            
            elif event == cv2.EVENT_LBUTTONUP:
                if len(current_spot) == 2:
                    spots.append(tuple(current_spot))
                    current_spot.clear()
        
        clone = frame.copy()
        cv2.namedWindow("Define Parking Spots")
        cv2.setMouseCallback("Define Parking Spots", mouse_callback)
        
        while True:
            display = clone.copy()
            
            # 繪製已定義的停車位
            for i, (pt1, pt2) in enumerate(spots):
                cv2.rectangle(display, pt1, pt2, (0, 255, 0), 2)
                cv2.putText(display, f"Spot {i+1}", 
                           (pt1[0], pt1[1]-10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            
            # 繪製當前正在定義的停車位
            if len(current_spot) == 1:
                cv2.rectangle(display, current_spot[0], 
                             (current_spot[0][0], current_spot[0][1]), 
                             (255, 0, 0), 2)
            
            cv2.imshow("Define Parking Spots", display)
            key = cv2.waitKey(1) & 0xFF
            
            if key == ord('s'):
                self.parking_spots = spots
                print(f"保存了 {len(spots)} 個停車位")
                break
            elif key == ord('r'):
                spots = []
                current_spot = []
                clone = frame.copy()
                print("重置所有停車位")
            elif key == ord('q'):
                break
        
        cv2.destroyWindow("Define Parking Spots")
        return spots
    
    def check_parking_spot(self, frame, spot):
        """檢查停車位是否被佔用"""
        (x1, y1), (x2, y2) = spot
        
        # 提取停車位區域
        roi = frame[y1:y2, x1:x2]
        
        if roi.size == 0:
            return False
        
        # 轉換為灰度圖
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        
        # 應用高斯模糊
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
        
        # 邊緣檢測
        edges = cv2.Canny(blurred, 50, 150)
        
        # 計算邊緣像素比例
        edge_ratio = np.sum(edges > 0) / edges.size
        
        # 基於邊緣密度判斷是否被佔用
        # 空車位邊緣較少,有車的車位邊緣較多
        return edge_ratio > 0.05  # 閾值可調整
    
    def process_video(self):
        """處理影片串流"""
        print("智慧停車場系統啟動中...")
        
        # 讀取第一幀來定義停車位
        ret, frame = self.cap.read()
        if not ret:
            print("無法讀取影片")
            return
        
        # 定義停車位
        self.parking_spots = self.define_parking_spots(frame)
        
        if not self.parking_spots:
            print("未定義停車位,系統退出")
            return
        
        # 初始化停車位狀態
        self.spot_status = {i: False for i in range(len(self.parking_spots))}
        
        print(f"開始監控 {len(self.parking_spots)} 個停車位...")
        print("按'q'退出監控")
        
        # 主循環
        while True:
            ret, frame = self.cap.read()
            if not ret:
                break
            
            # 複製框架用於顯示
            display = frame.copy()
            
            available_spots = 0
            
            # 檢查每個停車位
            for i, spot in enumerate(self.parking_spots):
                is_occupied = self.check_parking_spot(frame, spot)
                self.spot_status[i] = is_occupied
                
                # 繪製停車位
                (x1, y1), (x2, y2) = spot
                color = (0, 0, 255) if is_occupied else (0, 255, 0)
                thickness = 2
                
                cv2.rectangle(display, (x1, y1), (x2, y2), color, thickness)
                
                # 顯示狀態
                status = "Occupied" if is_occupied else "Available"
                cv2.putText(display, f"Spot {i+1}: {status}", 
                           (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 
                           0.5, color, 2)
                
                if not is_occupied:
                    available_spots += 1
            
            # 顯示統計資訊
            cv2.putText(display, f"Available: {available_spots}/{len(self.parking_spots)}", 
                       (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
            
            # 顯示時間
            from datetime import datetime
            current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            cv2.putText(display, current_time, 
                       (10, display.shape[0] - 10),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            
            # 顯示影像
            cv2.imshow("Smart Parking System", display)
            
            # 退出條件
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        self.cleanup()
    
    def cleanup(self):
        self.cap.release()
        cv2.destroyAllWindows()
        print("智慧停車場系統已關閉")

# 執行智慧停車場系統
# 注意:需要攝影機或影片文件
try:
    parking_system = SmartParkingSystem('parking_lot_video.mp4')
    parking_system.process_video()
except Exception as e:
    print(f"停車場系統演示跳過:{e}")

9.2 增強現實(AR)應用

python

# 增強現實(AR)應用
class AugmentedRealityApp:
    def __init__(self, marker_image='ar_marker.jpg'):
        self.marker_image = cv2.imread(marker_image, cv2.IMREAD_GRAYSCALE)
        self.ar_objects = []  # AR物件列表
        self.cap = cv2.VideoCapture(0)
        
        # 初始化ORB檢測器
        self.orb = cv2.ORB_create(1000)
        
        # 檢測標記圖像的特徵
        self.marker_kp, self.marker_des = self.orb.detectAndCompute(
            self.marker_image, None
        )
        
        # 創建暴力匹配器
        self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
        
        # 載入3D模型(簡化為2D圖像)
        self.load_ar_objects()
    
    def load_ar_objects(self):
        """載入AR物件"""
        # 這裡可以載入各種3D模型或2D圖像
        # 為簡化,我們使用顏色塊
        self.ar_objects = [
            {
                'name': 'cube',
                'points': np.float32([
                    [0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0],
                    [0, 0, -1], [1, 0, -1], [1, 1, -1], [0, 1, -1]
                ]),
                'edges': [(0,1), (1,2), (2,3), (3,0),
                         (4,5), (5,6), (6,7), (7,4),
                         (0,4), (1,5), (2,6), (3,7)],
                'color': (0, 255, 0)
            },
            {
                'name': 'pyramid',
                'points': np.float32([
                    [0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0],
                    [0.5, 0.5, 1]
                ]),
                'edges': [(0,1), (1,2), (2,3), (3,0),
                         (0,4), (1,4), (2,4), (3,4)],
                'color': (255, 0, 0)
            }
        ]
    
    def detect_marker(self, frame):
        """檢測AR標記"""
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # 檢測特徵
        kp, des = self.orb.detectAndCompute(gray, None)
        
        if des is None or len(kp) < 4:
            return None, None, None
        
        # 匹配特徵
        matches = self.bf.match(self.marker_des, des)
        matches = sorted(matches, key=lambda x: x.distance)
        
        if len(matches) < 10:  # 最少匹配數
            return None, None, None
        
        # 提取匹配點
        src_pts = np.float32([self.marker_kp[m.queryIdx].pt 
                              for m in matches]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp[m.trainIdx].pt 
                              for m in matches]).reshape(-1, 1, 2)
        
        # 計算單應性矩陣
        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        
        if M is None:
            return None, None, None
        
        return M, kp, matches
    
    def draw_ar_object(self, frame, homography, ar_object):
        """繪製AR物件"""
        # 獲取標記的角點
        h, w = self.marker_image.shape
        marker_corners = np.float32([
            [0, 0], [w, 0], [w, h], [0, h]
        ]).reshape(-1, 1, 2)
        
        # 計算標記在影像中的位置
        dst_corners = cv2.perspectiveTransform(marker_corners, homography)
        
        # 計算相機矩陣(簡化)
        # 在實際應用中,需要相機標定
        focal_length = frame.shape[1]
        center = (frame.shape[1]/2, frame.shape[0]/2)
        camera_matrix = np.array([
            [focal_length, 0, center[0]],
            [0, focal_length, center[1]],
            [0, 0, 1]
        ], dtype=np.float32)
        
        # 假設沒有透鏡畸變
        dist_coeffs = np.zeros((4, 1))
        
        # 求解PnP問題(3D到2D的投影)
        # 標記的3D點(假設標記在Z=0平面上)
        marker_3d_points = np.float32([
            [0, 0, 0], [w, 0, 0], [w, h, 0], [0, h, 0]
        ])
        
        # 使用solvePnP計算旋轉和平移向量
        success, rvec, tvec = cv2.solvePnP(
            marker_3d_points, dst_corners,
            camera_matrix, dist_coeffs
        )
        
        if not success:
            return frame
        
        # 投影AR物件的3D點
        ar_points_3d = ar_object['points']
        
        # 將3D點投影到2D影像平面
        ar_points_2d, _ = cv2.projectPoints(
            ar_points_3d, rvec, tvec,
            camera_matrix, dist_coeffs
        )
        
        ar_points_2d = np.int32(ar_points_2d).reshape(-1, 2)
        
        # 繪製邊緣
        for edge in ar_object['edges']:
            pt1 = tuple(ar_points_2d[edge[0]])
            pt2 = tuple(ar_points_2d[edge[1]])
            cv2.line(frame, pt1, pt2, ar_object['color'], 2)
        
        # 繪製頂點
        for point in ar_points_2d:
            cv2.circle(frame, tuple(point), 3, (0, 0, 255), -1)
        
        return frame
    
    def run(self):
        """運行AR應用"""
        print("增強現實應用啟動")
        print("按'q'退出")
        print("按'1', '2'切換AR物件")
        
        current_object_idx = 0
        
        while True:
            ret, frame = self.cap.read()
            if not ret:
                break
            
            # 檢測AR標記
            homography, kp, matches = self.detect_marker(frame)
            
            if homography is not None:
                # 繪製匹配特徵
                frame = cv2.drawMatches(
                    self.marker_image, self.marker_kp,
                    frame, kp, matches[:20], None,
                    flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
                )
                
                # 繪製AR物件
                frame = self.draw_ar_object(
                    frame, homography, 
                    self.ar_objects[current_object_idx]
                )
                
                # 顯示當前物件名稱
                cv2.putText(frame, 
                           f"AR Object: {self.ar_objects[current_object_idx]['name']}",
                           (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                           1, (255, 255, 255), 2)
            else:
                cv2.putText(frame, "Marker not detected", 
                           (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                           1, (0, 0, 255), 2)
            
            # 顯示影像
            cv2.imshow('Augmented Reality', frame)
            
            # 處理按鍵
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            elif key == ord('1'):
                current_object_idx = 0
            elif key == ord('2'):
                current_object_idx = 1
        
        self.cleanup()
    
    def cleanup(self):
        self.cap.release()
        cv2.destroyAllWindows()
        print("增強現實應用已關閉")

# 執行AR應用
# 注意:需要攝影機和AR標記圖像
try:
    ar_app = AugmentedRealityApp()
    ar_app.run()
except Exception as e:
    print(f"AR應用演示跳過:{e}")

第四部分:性能優化與最佳實踐

第10章 OpenCV性能優化技巧

10.1 性能優化策略

python

import time
import numpy as np

class OpenCVOptimizer:
    def __init__(self):
        self.results = {}
    
    def benchmark(self, func, *args, **kwargs):
        """基準測試函數"""
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        
        return result, end_time - start_time
    
    def optimize_loop_operations(self, image):
        """優化循環操作"""
        height, width = image.shape[:2]
        
        # 方法1:使用Python循環(最慢)
        def slow_method(img):
            result = img.copy()
            for y in range(height):
                for x in range(width):
                    for c in range(3):
                        result[y, x, c] = min(255, img[y, x, c] * 1.5)
            return result
        
        # 方法2:使用NumPy向量化操作(最快)
        def fast_method(img):
            result = np.clip(img.astype(np.float32) * 1.5, 0, 255)
            return result.astype(np.uint8)
        
        # 方法3:使用OpenCV內置函數
        def opencv_method(img):
            return cv2.convertScaleAbs(img, alpha=1.5, beta=0)
        
        # 執行基準測試
        print("循環操作優化比較:")
        print("-" * 50)
        
        _, time_slow = self.benchmark(slow_method, image)
        _, time_fast = self.benchmark(fast_method, image)
        _, time_opencv = self.benchmark(opencv_method, image)
        
        print(f"Python循環: {time_slow:.4f} 秒")
        print(f"NumPy向量化: {time_fast:.4f} 秒 (加速 {time_slow/time_fast:.1f}x)")
        print(f"OpenCV內置: {time_opencv:.4f} 秒 (加速 {time_slow/time_opencv:.1f}x)")
        
        return {
            'slow': time_slow,
            'fast': time_fast,
            'opencv': time_opencv
        }
    
    def optimize_image_operations(self, image):
        """優化影像操作"""
        # 方法比較:影像縮放
        def method_nearest(img):
            return cv2.resize(img, (800, 600), interpolation=cv2.INTER_NEAREST)
        
        def method_linear(img):
            return cv2.resize(img, (800, 600), interpolation=cv2.INTER_LINEAR)
        
        def method_cubic(img):
            return cv2.resize(img, (800, 600), interpolation=cv2.INTER_CUBIC)
        
        def method_area(img):
            return cv2.resize(img, (800, 600), interpolation=cv2.INTER_AREA)
        
        def method_lanczos(img):
            return cv2.resize(img, (800, 600), interpolation=cv2.INTER_LANCZOS4)
        
        methods = [
            ('NEAREST', method_nearest),
            ('LINEAR', method_linear),
            ('CUBIC', method_cubic),
            ('AREA', method_area),
            ('LANCZOS', method_lanczos)
        ]
        
        print("\n影像縮放插值方法比較:")
        print("-" * 50)
        
        times = {}
        for name, method in methods:
            _, exec_time = self.benchmark(method, image)
            times[name] = exec_time
            print(f"{name:10s}: {exec_time:.6f} 秒")
        
        return times
    
    def optimize_memory_usage(self, image):
        """優化記憶體使用"""
        print("\n記憶體使用優化:")
        print("-" * 50)
        
        # 檢查記憶體使用
        import sys
        
        # 方法1:原始影像
        mem_original = sys.getsizeof(image)
        
        # 方法2:使用適當的資料類型
        image_float32 = image.astype(np.float32)
        mem_float32 = sys.getsizeof(image_float32)
        
        # 方法3:使用灰度圖(減少通道)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        mem_gray = sys.getsizeof(gray)
        
        # 方法4:使用壓縮
        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 90]
        _, buffer = cv2.imencode('.jpg', image, encode_param)
        mem_compressed = sys.getsizeof(buffer)
        
        print(f"原始BGR影像: {mem_original / 1024:.1f} KB")
        print(f"float32影像: {mem_float32 / 1024:.1f} KB")
        print(f"灰度影像: {mem_gray / 1024:.1f} KB (減少 {((mem_original-mem_gray)/mem_original*100):.1f}%)")
        print(f"JPEG壓縮: {mem_compressed / 1024:.1f} KB (減少 {((mem_original-mem_compressed)/mem_original*100):.1f}%)")
        
        return {
            'original': mem_original,
            'float32': mem_float32,
            'gray': mem_gray,
            'compressed': mem_compressed
        }
    
    def parallel_processing_optimization(self, images):
        """平行處理優化"""
        print("\n平行處理優化:")
        print("-" * 50)
        
        def process_single(image):
            # 模擬影像處理
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
            edges = cv2.Canny(blurred, 50, 150)
            return edges
        
        # 順序處理
        def sequential_processing(imgs):
            results = []
            for img in imgs:
                results.append(process_single(img))
            return results
        
        # 平行處理(使用multiprocessing)
        import multiprocessing as mp
        
        def parallel_processing(imgs):
            with mp.Pool(processes=mp.cpu_count()) as pool:
                results = pool.map(process_single, imgs)
            return results
        
        # 基準測試
        _, time_seq = self.benchmark(sequential_processing, images)
        _, time_par = self.benchmark(parallel_processing, images)
        
        print(f"順序處理: {time_seq:.4f} 秒")
        print(f"平行處理: {time_par:.4f} 秒")
        print(f"加速比: {time_seq/time_par:.2f}x")
        print(f"CPU核心數: {mp.cpu_count()}")
        
        return {
            'sequential': time_seq,
            'parallel': time_par,
            'speedup': time_seq/time_par,
            'cpu_cores': mp.cpu_count()
        }
    
    def gpu_acceleration(self, image):
        """GPU加速"""
        print("\nGPU加速檢查:")
        print("-" * 50)
        
        gpu_info = {}
        
        try:
            # 檢查CUDA是否可用
            count = cv2.cuda.getCudaEnabledDeviceCount()
            gpu_info['cuda_devices'] = count
            
            if count > 0:
                print(f"檢測到 {count} 個CUDA設備")
                
                # 獲取設備資訊
                for i in range(count):
                    info = cv2.cuda.printCudaDeviceInfo(i)
                    print(f"設備 {i}: {info}")
                
                # 測試GPU加速
                # 創建GPU影像
                gpu_img = cv2.cuda_GpuMat()
                gpu_img.upload(image)
                
                # GPU處理
                start = time.time()
                
                # 轉換為灰度
                gpu_gray = cv2.cuda.cvtColor(gpu_img, cv2.COLOR_BGR2GRAY)
                
                # 高斯模糊
                gpu_blur = cv2.cuda.createGaussianFilter(
                    cv2.CV_8UC1, cv2.CV_8UC1, (5, 5), 0
                ).apply(gpu_gray)
                
                # Sobel邊緣檢測
                sobel_x = cv2.cuda.createSobelFilter(
                    cv2.CV_8UC1, cv2.CV_8UC1, 1, 0
                ).apply(gpu_blur)
                
                sobel_y = cv2.cuda.createSobelFilter(
                    cv2.CV_8UC1, cv2.CV_8UC1, 0, 1
                ).apply(gpu_blur)
                
                # 下載結果
                result_x = sobel_x.download()
                result_y = sobel_y.download()
                
                gpu_time = time.time() - start
                
                # CPU處理對比
                start = time.time()
                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                blur = cv2.GaussianBlur(gray, (5, 5), 0)
                sobelx = cv2.Sobel(blur, cv2.CV_8UC1, 1, 0, ksize=3)
                sobely = cv2.Sobel(blur, cv2.CV_8UC1, 0, 1, ksize=3)
                cpu_time = time.time() - start
                
                print(f"CPU處理時間: {cpu_time:.6f} 秒")
                print(f"GPU處理時間: {gpu_time:.6f} 秒")
                print(f"GPU加速比: {cpu_time/gpu_time:.2f}x")
                
                gpu_info['cpu_time'] = cpu_time
                gpu_info['gpu_time'] = gpu_time
                gpu_info['speedup'] = cpu_time/gpu_time
                
            else:
                print("未檢測到CUDA設備")
                
        except Exception as e:
            print(f"GPU加速檢查失敗: {e}")
        
        return gpu_info

# 執行性能優化測試
def run_optimization_tests():
    # 創建測試影像
    test_image = np.random.randint(0, 256, (1080, 1920, 3), dtype=np.uint8)
    
    optimizer = OpenCVOptimizer()
    
    # 運行各種優化測試
    print("=" * 60)
    print("OpenCV性能優化測試")
    print("=" * 60)
    
    # 1. 循環操作優化
    loop_results = optimizer.optimize_loop_operations(test_image[:100, :100])
    
    # 2. 影像操作優化
    resize_results = optimizer.optimize_image_operations(test_image)
    
    # 3. 記憶體使用優化
    memory_results = optimizer.optimize_memory_usage(test_image)
    
    # 4. 創建多個測試影像用於平行處理
    test_images = [test_image.copy() for _ in range(10)]
    parallel_results = optimizer.parallel_processing_optimization(test_images)
    
    # 5. GPU加速檢查
    gpu_results = optimizer.gpu_acceleration(test_image)
    
    print("\n" + "=" * 60)
    print("性能優化建議總結:")
    print("=" * 60)
    print("1. 避免使用Python循環處理像素,使用NumPy向量化操作")
    print("2. 選擇適當的插值方法:")
    print("   - INTER_NEAREST: 最快,質量最低")
    print("   - INTER_LINEAR: 平衡速度和質量")
    print("   - INTER_CUBIC: 高質量,較慢")
    print("   - INTER_AREA: 縮小影像時最佳")
    print("3. 減少記憶體使用:")
    print("   - 使用灰度圖替代彩色圖")
    print("   - 使用適當的資料類型")
    print("   - 考慮影像壓縮")
    print("4. 利用平行處理處理多個影像")
    print("5. 如有GPU,啟用CUDA加速")
    
    return {
        'loop': loop_results,
        'resize': resize_results,
        'memory': memory_results,
        'parallel': parallel_results,
        'gpu': gpu_results
    }

# 運行優化測試
optimization_results = run_optimization_tests()

第11章 最佳實踐與常見問題

11.1 最佳實踐指南

python

# OpenCV最佳實踐類別
class OpenCVBestPractices:
    @staticmethod
    def image_io_best_practices():
        """影像I/O最佳實踐"""
        practices = [
            {
                'title': '檢查影像是否成功載入',
                'bad_practice': 'img = cv2.imread("image.jpg")',
                'good_practice': '''img = cv2.imread("image.jpg")
if img is None:
    print("無法載入影像")
    # 處理錯誤情況
else:
    # 繼續處理影像''',
                'reason': '直接使用可能為None的影像會導致後續操作失敗'
            },
            {
                'title': '指定影像讀取模式',
                'bad_practice': 'img = cv2.imread("image.jpg")',
                'good_practice': '''# 根據需求選擇讀取模式
img_color = cv2.imread("image.jpg", cv2.IMREAD_COLOR)  # 彩色影像
img_grayscale = cv2.imread("image.jpg", cv2.IMREAD_GRAYSCALE)  # 灰度影像
img_unchanged = cv2.imread("image.jpg", cv2.IMREAD_UNCHANGED)  # 包含alpha通道''',
                'reason': '明確指定模式可以避免意外行為並提高效率'
            },
            {
                'title': '高效儲存影像',
                'bad_practice': 'cv2.imwrite("output.jpg", img)',
                'good_practice': '''# 控制JPEG質量
cv2.imwrite("output.jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 95])

# 控制PNG壓縮級別
cv2.imwrite("output.png", img, [cv2.IMWRITE_PNG_COMPRESSION, 9])''',
                'reason': '調整壓縮參數可以在質量和文件大小之間取得平衡'
            }
        ]
        
        return practices
    
    @staticmethod
    def memory_management_best_practices():
        """記憶體管理最佳實踐"""
        practices = [
            {
                'title': '及時釋放資源',
                'bad_practice': '# 忘記釋放攝影機資源',
                'good_practice': '''cap = cv2.VideoCapture(0)
try:
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # 處理幀
finally:
    cap.release()
    cv2.destroyAllWindows()''',
                'reason': '確保資源被正確釋放,避免記憶體洩漏'
            },
            {
                'title': '使用適當的資料類型',
                'bad_practice': '# 對8-bit影像使用浮點運算',
                'good_practice': '''# 根據操作選擇資料類型
img_uint8 = image.astype(np.uint8)  # 8-bit無符號整數
img_float32 = image.astype(np.float32)  # 32-bit浮點數
img_float64 = image.astype(np.float64)  # 64-bit浮點數

# 運算後轉換回適當類型
result = np.clip(img_float32 * 1.5, 0, 255).astype(np.uint8)''',
                'reason': '適當的資料類型可以節省記憶體並提高運算精度'
            },
            {
                'title': '避免不必要的複製',
                'bad_practice': '# 頻繁複製大型影像',
                'good_practice': '''# 只有在必要時才複製
img_copy = image.copy()  # 深複製

# 使用視圖而不是複製
img_view = image[:100, :100]  # 創建視圖,不複製資料

# 就地操作(修改原始影像)
cv2.rectangle(image, (10, 10), (100, 100), (255, 0, 0), 2)''',
                'reason': '減少記憶體分配和複製操作可以提高性能'
            }
        ]
        
        return practices
    
    @staticmethod
    def performance_best_practices():
        """性能最佳實踐"""
        practices = [
            {
                'title': '向量化操作',
                'bad_practice': '''for y in range(height):
    for x in range(width):
        image[y, x] = image[y, x] * 1.5''',
                'good_practice': '''# 使用NumPy向量化操作
image = np.clip(image.astype(np.float32) * 1.5, 0, 255).astype(np.uint8)

# 或使用OpenCV內置函數
image = cv2.convertScaleAbs(image, alpha=1.5, beta=0)''',
                'reason': '向量化操作比Python循環快數百倍'
            },
            {
                'title': '預分配記憶體',
                'bad_practice': '''results = []
for i in range(1000):
    result = process_image(image)
    results.append(result)''',
                'good_practice': '''# 預分配記憶體
results = np.empty((1000, height, width, 3), dtype=np.uint8)
for i in range(1000):
    results[i] = process_image(image)''',
                'reason': '預分配避免反覆重新分配記憶體,提高性能'
            },
            {
                'title': '使用適當的演算法參數',
                'bad_practice': '''# 使用過高的解析度或質量
resized = cv2.resize(image, (4000, 3000))''',
                'good_practice': '''# 根據應用需求選擇適當參數
# 人臉檢測不需要高解析度
resized = cv2.resize(image, (640, 480))

# 調整檢測器參數平衡速度和準確性
faces = face_cascade.detectMultiScale(
    gray,
    scaleFactor=1.1,  # 較小值更準確但更慢
    minNeighbors=5,   # 較大值減少誤報
    minSize=(30, 30)  # 最小人臉尺寸
)''',
                'reason': '適當的參數可以在保持質量的同時大幅提高性能'
            }
        ]
        
        return practices
    
    @staticmethod
    def code_quality_best_practices():
        """代碼質量最佳實踐"""
        practices = [
            {
                'title': '錯誤處理',
                'bad_practice': '# 忽略潛在的錯誤',
                'good_practice': '''try:
    image = cv2.imread("image.jpg")
    if image is None:
        raise FileNotFoundError("無法載入影像")
    
    # 影像處理操作
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
except FileNotFoundError as e:
    print(f"文件錯誤: {e}")
    # 恢復或退出
    
except cv2.error as e:
    print(f"OpenCV錯誤: {e}")
    # 處理OpenCV特定錯誤
    
except Exception as e:
    print(f"未預期錯誤: {e}")
    # 通用錯誤處理''',
                'reason': '適當的錯誤處理提高代碼健壯性和可維護性'
            },
            {
                'title': '代碼模組化',
                'bad_practice': '# 所有功能寫在一個大函數中',
                'good_practice': '''def load_image(path, mode=cv2.IMREAD_COLOR):
    """載入影像並檢查是否成功"""
    image = cv2.imread(path, mode)
    if image is None:
        raise ValueError(f"無法載入影像: {path}")
    return image

def preprocess_image(image, target_size=(224, 224)):
    """預處理影像:調整大小、歸一化等"""
    resized = cv2.resize(image, target_size)
    normalized = resized / 255.0
    return normalized

def process_pipeline(image_path):
    """完整的處理流程"""
    image = load_image(image_path)
    processed = preprocess_image(image)
    return processed''',
                'reason': '模組化代碼更易於測試、維護和重用'
            },
            {
                'title': '文檔和註釋',
                'bad_practice': '# 缺乏文檔和註釋',
                'good_practice': '''def detect_objects(image, confidence_threshold=0.5):
    """
    使用YOLO模型檢測影像中的物件
    
    參數:
    image: 輸入影像 (BGR格式)
    confidence_threshold: 置信度閾值,預設0.5
    
    返回:
    results: 檢測結果列表,每個元素為(標籤, 置信度, 邊界框)
    """
    # 載入模型
    net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
    
    # 準備輸入blob
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), 
                                 swapRB=True, crop=False)
    
    # ... 其餘代碼 ...
    
    return results''',
                'reason': '良好的文檔和註釋提高代碼可讀性和可維護性'
            }
        ]
        
        return practices
    
    @staticmethod
    def display_all_best_practices():
        """顯示所有最佳實踐"""
        categories = [
            ("影像I/O最佳實踐", OpenCVBestPractices.image_io_best_practices()),
            ("記憶體管理最佳實踐", OpenCVBestPractices.memory_management_best_practices()),
            ("性能最佳實踐", OpenCVBestPractices.performance_best_practices()),
            ("代碼質量最佳實踐", OpenCVBestPractices.code_quality_best_practices())
        ]
        
        for category_name, practices in categories:
            print(f"\n{'='*60}")
            print(category_name)
            print('='*60)
            
            for i, practice in enumerate(practices, 1):
                print(f"\n{i}. {practice['title']}")
                print(f"   原因: {practice['reason']}")
                print(f"   不良實踐:\n{practice['bad_practice']}")
                print(f"   最佳實踐:\n{practice['good_practice']}")

# 顯示最佳實踐指南
OpenCVBestPractices.display_all_best_practices()

11.2 常見問題與解決方案

python

# OpenCV常見問題與解決方案
class OpenCVFAQ:
    @staticmethod
    def common_issues_and_solutions():
        """常見問題與解決方案"""
        faqs = [
            {
                'question': '影像載入返回None',
                'cause': '文件路徑錯誤、文件損壞或格式不受支援',
                'solution': '''1. 檢查文件路徑是否正確
2. 驗證文件是否存在且可讀
3. 確保影像格式受支援
4. 使用絕對路徑代替相對路徑

# 範例代碼:
import os

image_path = "image.jpg"
if not os.path.exists(image_path):
    print(f"文件不存在: {image_path}")
else:
    image = cv2.imread(image_path)
    if image is None:
        print(f"無法載入影像,可能是格式不受支援或文件損壞")
    else:
        print(f"成功載入影像,尺寸: {image.shape}")'''
            },
            {
                'question': '色彩顯示不正確',
                'cause': 'OpenCV使用BGR色彩空間,而其他庫通常使用RGB',
                'solution': '''# 將BGR轉換為RGB用於顯示
image_bgr = cv2.imread("image.jpg")  # OpenCV載入為BGR
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

# 使用Matplotlib顯示
import matplotlib.pyplot as plt
plt.imshow(image_rgb)
plt.show()

# 或將RGB轉換為BGR用於OpenCV操作
image_rgb = plt.imread("image.jpg")  # Matplotlib載入為RGB
image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)'''
            },
            {
                'question': '影片處理速度太慢',
                'cause': '每幀處理過於複雜、解析度過高或未使用優化',
                'solution': '''1. 降低影像解析度
2. 減少每幀的處理操作
3. 使用向量化操作代替循環
4. 啟用多線程或GPU加速

# 優化範例:
def optimized_video_processing():
    cap = cv2.VideoCapture(0)
    
    # 降低解析度
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    
    # 跳過一些幀以提高速度
    frame_skip = 2
    frame_count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        if frame_count % frame_skip != 0:
            continue  # 跳過此幀
        
        # 使用高效的處理方法
        # 例如,只在必要時進行色彩轉換
        if need_gray:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # 顯示結果
        cv2.imshow('Optimized Processing', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()'''
            },
            {
                'question': '記憶體使用過高',
                'cause': '未及時釋放資源、影像緩存過多或資料類型不當',
                'solution': '''1. 及時釋放攝影機和視窗資源
2. 使用適當的資料類型
3. 釋放不再需要的變數
4. 使用生成器處理大型影像集合

# 記憶體管理範例:
def memory_efficient_processing(image_paths):
    for path in image_paths:
        # 每次只載入一個影像
        image = cv2.imread(path)
        if image is None:
            continue
        
        # 處理影像
        processed = process_image(image)
        
        # 立即釋放原始影像
        del image
        
        yield processed
        
        # 強制垃圾回收(在必要時)
        import gc
        gc.collect()'''
            },
            {
                'question': '檢測演算法準確性低',
                'cause': '參數設置不當、影像質量差或光照條件不佳',
                'solution': '''1. 調整檢測器參數
2. 預處理影像(去噪、增強對比度等)
3. 使用適當的色彩空間
4. 考慮光照條件

# 改進檢測準確性:
def improve_detection_accuracy(image):
    # 1. 預處理影像
    # 去噪
    denoised = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    
    # 增強對比度
    lab = cv2.cvtColor(denoised, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    enhanced = cv2.merge([l, a, b])
    enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)
    
    # 2. 調整檢測參數
    gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
    
    # 人臉檢測範例
    face_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    )
    
    # 嘗試不同的參數組合
    faces = face_cascade.detectMultiScale(
        gray,
        scaleFactor=1.05,  # 較小值提高準確性
        minNeighbors=6,    # 較大值減少誤報
        minSize=(50, 50),  # 根據應用調整
        flags=cv2.CASCADE_SCALE_IMAGE
    )
    
    return faces'''
            },
            {
                'question': '跨平台兼容性問題',
                'cause': '不同操作系統的差異、依賴庫版本問題或硬體差異',
                'solution': '''1. 使用虛擬環境管理依賴
2. 明確指定庫版本
3. 檢查操作系統特定問題
4. 使用條件代碼處理平台差異

# requirements.txt範例:
opencv-python==4.5.5.64
numpy==1.21.6
matplotlib==3.5.2

# 平台兼容性檢查:
import platform
import sys

def check_platform_compatibility():
    system = platform.system()
    print(f"操作系統: {system}")
    print(f"Python版本: {sys.version}")
    print(f"OpenCV版本: {cv2.__version__}")
    
    if system == "Windows":
        # Windows特定設置
        print("運行在Windows系統上")
    elif system == "Linux":
        # Linux特定設置
        print("運行在Linux系統上")
    elif system == "Darwin":
        # macOS特定設置
        print("運行在macOS系統上")
    
    # 檢查必要的功能
    if not cv2.ocl.haveOpenCL():
        print("警告: OpenCL不可用,某些加速功能可能受限")
    
    return system'''
            }
        ]
        
        return faqs
    
    @staticmethod
    def display_faq():
        """顯示常見問題與解決方案"""
        faqs = OpenCVFAQ.common_issues_and_solutions()
        
        print("\n" + "="*80)
        print("OpenCV常見問題與解決方案")
        print("="*80)
        
        for i, faq in enumerate(faqs, 1):
            print(f"\n{i}. 問題: {faq['question']}")
            print(f"   原因: {faq['cause']}")
            print(f"   解決方案:")
            print(f"{faq['solution']}")
            print("-"*80)

# 顯示常見問題與解決方案
OpenCVFAQ.display_faq()

結論

OpenCV作為一個功能強大的電腦視覺庫,從基礎的影像處理到先進的深度學習應用,提供了完整的解決方案。通過本指南,我們從入門基礎開始,逐步深入到高級應用,涵蓋了:

  1. 基礎操作:影像讀取、顯示、色彩空間轉換、基本變換

  2. 進階技巧:濾波、形態學操作、特徵檢測、影像分割

  3. 高級應用:物件檢測、追蹤、深度學習整合、AR應用

  4. 性能優化:向量化操作、記憶體管理、平行處理、GPU加速

  5. 最佳實踐:代碼質量、錯誤處理、跨平台兼容性

未來發展方向

隨著電腦視覺技術的不斷發展,OpenCV也在持續進化。未來的發展方向可能包括:

  1. 深度學習集成:更好的深度學習模型支援和優化

  2. 邊緣計算:針對物聯網和移動設備的優化

  3. 3D視覺:增強的三維重建和點雲處理能力

  4. 實時性能:更高效的演算法和硬體加速

學習資源建議

  1. 官方文檔:OpenCV官方文檔是最權威的學習資源

  2. GitHub倉庫:查看官方示例和社區貢獻

  3. 學術論文:了解演算法原理和最新研究

  4. 實踐專案:通過實際專案加深理解

  5. 社區論壇:Stack Overflow、OpenCV論壇等

電腦視覺是一個快速發展的領域,持續學習和實踐是掌握這項技術的關鍵。無論是學術研究還是工業應用,OpenCV都將是您強大的工具。祝您在電腦視覺的旅程中取得成功!

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐