识别纸币面额,语音播报金额,帮助视障用户快速分辨不同面值的人民币。
虽然有100元和50元可以通过盲文识别,但1元、5元、10元、20元纸币没有明显触觉差异。print("错误: OpenCV未安装,请运行: pip install opencv-python")print(f"模型训练完成,测试集准确率: {accuracy:.2f}")print(f"模型已加载: {model_path}")rect[1] = pts[np.argmin(diff)] # 右
视障人士纸币识别系统
一、实际应用场景与痛点
应用场景
视障用户王阿姨经营着一家小卖部。每天她需要接收现金支付,但无法通过触摸准确分辨纸币面额。虽然有100元和50元可以通过盲文识别,但1元、5元、10元、20元纸币没有明显触觉差异。她经常需要请别人帮忙辨认,既麻烦又不保护隐私。一次,她不慎将100元当作10元找零,造成了不小的损失。
核心痛点
1. 触觉区分困难:小面额纸币缺乏明显触觉特征
2. 依赖他人:需要旁人帮助识别,缺乏独立性
3. 隐私泄露:让他人知道财务状况
4. 效率低下:每次交易都需要反复确认
5. 错误风险:容易收错钱、找错钱
6. 新版纸币:2019版人民币设计变化,触感更不明显
二、核心逻辑设计
1. 启动摄像头,获取纸币图像
2. 预处理图像(去噪、增强、透视校正)
3. 检测纸币边界和位置
4. 提取关键特征区域
5. 多特征融合识别
6. 计算置信度,多重验证
7. 语音播报面额和方向
8. 记录交易历史
三、模块化代码实现
主程序文件:banknote_recognition_system.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
视障人士纸币识别系统
实时识别人民币面额,语音播报金额
版本:3.0.0
作者:无障碍智能助手
"""
import os
import sys
import time
import json
import pickle
import threading
import queue
from datetime import datetime
from typing import Dict, List, Tuple, Optional, Any, Union
from dataclasses import dataclass, asdict, field
from enum import Enum
import warnings
warnings.filterwarnings('ignore')
# 图像处理
try:
import cv2
import numpy as np
CV2_AVAILABLE = True
except ImportError:
CV2_AVAILABLE = False
print("错误: OpenCV未安装,请运行: pip install opencv-python")
sys.exit(1)
# 机器学习
try:
import sklearn
from sklearn import svm, ensemble, neighbors
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
SKLEARN_AVAILABLE = True
except ImportError:
SKLEARN_AVAILABLE = False
print("警告: scikit-learn未安装,部分功能受限")
# 音频处理
try:
import pyttsx3
TTS_AVAILABLE = True
except ImportError:
TTS_AVAILABLE = False
print("警告: pyttsx3未安装,语音功能受限")
try:
import simpleaudio as sa
import wave
AUDIO_AVAILABLE = True
except ImportError:
AUDIO_AVAILABLE = False
# 硬件接口
try:
import RPi.GPIO as GPIO
RASPBERRY_PI_AVAILABLE = True
except ImportError:
RASPBERRY_PI_AVAILABLE = False
class BanknoteDenomination(Enum):
"""人民币面额枚举"""
UNKNOWN = 0
RMB_1 = 1 # 1元
RMB_5 = 5 # 5元
RMB_10 = 10 # 10元
RMB_20 = 20 # 20元
RMB_50 = 50 # 50元
RMB_100 = 100 # 100元
@property
def chinese_name(self) -> str:
"""中文名称"""
names = {
self.RMB_1: "一元",
self.RMB_5: "五元",
self.RMB_10: "十元",
self.RMB_20: "二十元",
self.RMB_50: "五十元",
self.RMB_100: "一百元",
self.UNKNOWN: "未知"
}
return names.get(self, "未知")
@property
def color_description(self) -> str:
"""颜色描述(供视障用户参考)"""
colors = {
self.RMB_1: "橄榄绿色",
self.RMB_5: "紫色",
self.RMB_10: "蓝黑色",
self.RMB_20: "棕色",
self.RMB_50: "绿色",
self.RMB_100: "红色"
}
return colors.get(self, "未知颜色")
@property
def size(self) -> Tuple[int, int]:
"""纸币尺寸(毫米)- 2019版人民币"""
sizes = {
self.RMB_1: (130, 63),
self.RMB_5: (135, 64),
self.RMB_10: (140, 65),
self.RMB_20: (145, 66),
self.RMB_50: (150, 70),
self.RMB_100: (155, 72)
}
return sizes.get(self, (0, 0))
@dataclass
class Banknote:
"""纸币信息类"""
denomination: BanknoteDenomination
confidence: float
orientation: str # 方向: front/back/unknown
version: str # 版别: 1999/2005/2015/2019
timestamp: float
image_path: Optional[str] = None
features: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict:
"""转换为字典"""
return {
'denomination': self.denomination.value,
'denomination_name': self.denomination.chinese_name,
'confidence': self.confidence,
'orientation': self.orientation,
'version': self.version,
'timestamp': datetime.fromtimestamp(self.timestamp).isoformat(),
'image_path': self.image_path,
'color': self.denomination.color_description,
'size': self.denomination.size
}
class ImagePreprocessor:
"""图像预处理器"""
def __init__(self, config: Dict):
"""
初始化预处理器
Args:
config: 预处理配置
"""
self.config = config
def preprocess(self, image: np.ndarray) -> np.ndarray:
"""
预处理图像
Args:
image: 原始BGR图像
Returns:
预处理后的图像
"""
if image is None or image.size == 0:
return None
# 转换为RGB
if len(image.shape) == 3 and image.shape[2] == 3:
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
rgb = image
# 1. 去噪
denoised = cv2.GaussianBlur(rgb, (3, 3), 0)
# 2. 增强对比度
lab = cv2.cvtColor(denoised, cv2.COLOR_RGB2LAB)
l, a, b = cv2.split(lab)
# CLAHE对比度限制直方图均衡化
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
l = clahe.apply(l)
enhanced = cv2.merge([l, a, b])
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
# 3. 调整大小
target_size = self.config.get('target_size', (600, 300))
resized = cv2.resize(enhanced, target_size, interpolation=cv2.INTER_AREA)
return resized
def correct_perspective(self, image: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
透视校正,提取平展的纸币图像
Args:
image: 原始图像
Returns:
(校正后的图像, 原始图像的透视变换矩阵)
"""
if image is None:
return None, None
# 转换为灰度
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 边缘检测
edges = cv2.Canny(gray, 50, 150)
# 形态学操作闭合边缘
kernel = np.ones((5, 5), np.uint8)
closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
# 查找轮廓
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return image, None
# 找到最大的轮廓(应该是纸币)
largest_contour = max(contours, key=cv2.contourArea)
# 近似多边形
epsilon = 0.02 * cv2.arcLength(largest_contour, True)
approx = cv2.approxPolyDP(largest_contour, epsilon, True)
if len(approx) != 4:
return image, None
# 排序四个点(左上,右上,右下,左下)
points = approx.reshape(4, 2)
points = self._order_points(points)
# 计算目标尺寸(使用最大纸币尺寸)
width = max(np.linalg.norm(points[1] - points[0]),
np.linalg.norm(points[3] - points[2]))
height = max(np.linalg.norm(points[2] - points[1]),
np.linalg.norm(points[3] - points[0]))
# 目标点
dst = np.array([
[0, 0],
[width - 1, 0],
[width - 1, height - 1],
[0, height - 1]
], dtype="float32")
# 计算透视变换矩阵
M = cv2.getPerspectiveTransform(points.astype("float32"), dst)
# 应用透视变换
warped = cv2.warpPerspective(image, M, (int(width), int(height)))
return warped, M
def _order_points(self, pts: np.ndarray) -> np.ndarray:
"""按顺序排序矩形四个点"""
rect = np.zeros((4, 2), dtype="float32")
# 左上和右下
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)] # 左上
rect[2] = pts[np.argmax(s)] # 右下
# 右上和左下
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)] # 右上
rect[3] = pts[np.argmax(diff)] # 左下
return rect
def extract_roi(self, image: np.ndarray, roi_config: Dict) -> Dict[str, np.ndarray]:
"""
提取感兴趣区域
Args:
image: 输入图像
roi_config: ROI配置
Returns:
区域名称到图像的映射
"""
rois = {}
height, width = image.shape[:2]
for roi_name, roi_spec in roi_config.items():
# 支持百分比和像素坐标
if isinstance(roi_spec, dict) and 'relative' in roi_spec:
rel = roi_spec['relative']
x1 = int(rel[0] * width)
y1 = int(rel[1] * height)
x2 = int(rel[2] * width)
y2 = int(rel[3] * height)
roi = image[y1:y2, x1:x2]
if roi.size > 0:
rois[roi_name] = roi
return rois
class FeatureExtractor:
"""特征提取器"""
def __init__(self, config: Dict):
"""
初始化特征提取器
Args:
config: 特征提取配置
"""
self.config = config
def extract_color_features(self, image: np.ndarray) -> Dict[str, float]:
"""
提取颜色特征
Args:
image: RGB图像
Returns:
颜色特征字典
"""
features = {}
if image is None or image.size == 0:
return features
# 转换到HSV颜色空间
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
# 计算平均HSV值
h_mean = np.mean(hsv[:, :, 0])
s_mean = np.mean(hsv[:, :, 1])
v_mean = np.mean(hsv[:, :, 2])
features['h_mean'] = float(h_mean)
features['s_mean'] = float(s_mean)
features['v_mean'] = float(v_mean)
# 计算HSV标准差
features['h_std'] = float(np.std(hsv[:, :, 0]))
features['s_std'] = float(np.std(hsv[:, :, 1]))
features['v_std'] = float(np.std(hsv[:, :, 2]))
# 主色调(通过直方图)
h_hist = cv2.calcHist([hsv], [0], None, [180], [0, 180])
dominant_hue = np.argmax(h_hist)
features['dominant_hue'] = float(dominant_hue)
return features
def extract_texture_features(self, image: np.ndarray) -> Dict[str, float]:
"""
提取纹理特征
Args:
image: 灰度图像
Returns:
纹理特征字典
"""
features = {}
if image is None or image.size == 0:
return features
# 转换为灰度
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
# 计算LBP(局部二值模式)特征
lbp_features = self._compute_lbp(gray)
features.update(lbp_features)
# 计算灰度共生矩阵特征
glcm_features = self._compute_glcm(gray)
features.update(glcm_features)
return features
def _compute_lbp(self, gray: np.ndarray) -> Dict[str, float]:
"""计算LBP特征"""
features = {}
# 简单LBP实现
radius = 1
n_points = 8 * radius
height, width = gray.shape
lbp = np.zeros((height-2*radius, width-2*radius), dtype=np.uint8)
for i in range(radius, height - radius):
for j in range(radius, width - radius):
center = gray[i, j]
binary = 0
for n in range(n_points):
# 计算采样点坐标
angle = 2 * np.pi * n / n_points
x = i + radius * np.cos(angle)
y = j - radius * np.sin(angle) # 注意:图像y轴向下
# 双线性插值
x1, y1 = int(np.floor(x)), int(np.floor(y))
x2, y2 = int(np.ceil(x)), int(np.ceil(y))
if 0 <= x1 < height and 0 <= x2 < height and 0 <= y1 < width and 0 <= y2 < width:
# 双线性插值
fx = x - x1
fy = y - y1
val = (1 - fx) * (1 - fy) * gray[x1, y1] + \
fx * (1 - fy) * gray[x2, y1] + \
(1 - fx) * fy * gray[x1, y2] + \
fx * fy * gray[x2, y2]
binary |= ((val >= center) << (n_points - 1 - n))
lbp[i - radius, j - radius] = binary
# 计算LBP直方图
hist, _ = np.histogram(lbp.ravel(), bins=256, range=[0, 256])
hist = hist.astype("float")
hist /= (hist.sum() + 1e-7) # 归一化
# 取前几个统计量
for i in range(10):
features[f'lbp_hist_{i}'] = float(hist[i])
features['lbp_mean'] = float(np.mean(lbp))
features['lbp_std'] = float(np.std(lbp))
return features
def _compute_glcm(self, gray: np.ndarray) -> Dict[str, float]:
"""计算灰度共生矩阵特征"""
features = {}
# 简化的GLCM计算
distances = [1]
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
for d in distances:
for a_idx, angle in enumerate(angles):
# 简化的特征计算
# 在实际应用中应该实现完整的GLCM
features[f'glcm_d{d}_a{a_idx}_asm'] = 0.0
features[f'glcm_d{d}_a{a_idx}_contrast'] = 0.0
features[f'glcm_d{d}_a{a_idx}_correlation'] = 0.0
return features
def extract_shape_features(self, image: np.ndarray) -> Dict[str, float]:
"""
提取形状特征
Args:
image: 二值化图像
Returns:
形状特征字典
"""
features = {}
if image is None or image.size == 0:
return features
# 转换为二值图像
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 查找轮廓
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return features
# 使用最大轮廓
largest_contour = max(contours, key=cv2.contourArea)
# 计算轮廓特征
area = cv2.contourArea(largest_contour)
perimeter = cv2.arcLength(largest_contour, True)
features['contour_area'] = float(area)
features['contour_perimeter'] = float(perimeter)
if perimeter > 0:
features['circularity'] = float(4 * np.pi * area / (perimeter * perimeter))
else:
features['circularity'] = 0.0
# 计算外接矩形
x, y, w, h = cv2.boundingRect(largest_contour)
features['aspect_ratio'] = float(w) / h if h > 0 else 0.0
features['rectangularity'] = float(area) / (w * h) if w * h > 0 else 0.0
return features
def extract_orb_features(self, image: np.ndarray) -> Optional[np.ndarray]:
"""
提取ORB特征
Args:
image: 灰度图像
Returns:
ORB特征描述符
"""
if image is None or image.size == 0:
return None
# 转换为灰度
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
# 创建ORB检测器
orb = cv2.ORB_create(nfeatures=500)
# 检测关键点和计算描述符
keypoints, descriptors = orb.detectAndCompute(gray, None)
return descriptors
def extract_all_features(self, image: np.ndarray, rois: Dict[str, np.ndarray]) -> Dict[str, Any]:
"""
提取所有特征
Args:
image: 完整图像
rois: ROI区域
Returns:
包含所有特征的字典
"""
features = {}
# 提取全局特征
features['global_color'] = self.extract_color_features(image)
features['global_texture'] = self.extract_texture_features(image)
features['global_shape'] = self.extract_shape_features(image)
# 提取ROI特征
for roi_name, roi_image in rois.items():
if roi_image is not None and roi_image.size > 0:
roi_features = {}
roi_features['color'] = self.extract_color_features(roi_image)
roi_features['texture'] = self.extract_texture_features(roi_image)
features[roi_name] = roi_features
return features
class BanknoteClassifier:
"""纸币分类器"""
def __init__(self, config: Dict):
"""
初始化分类器
Args:
config: 分类器配置
"""
self.config = config
self.model = None
self.scaler = None
self.feature_names = []
self.classes = []
# 加载或训练模型
self.load_or_train_model()
def load_or_train_model(self):
"""加载或训练模型"""
model_path = self.config.get('model_path', 'models/banknote_classifier.pkl')
if os.path.exists(model_path):
try:
with open(model_path, 'rb') as f:
data = pickle.load(f)
self.model = data['model']
self.scaler = data['scaler']
self.feature_names = data['feature_names']
self.classes = data['classes']
print(f"模型已加载: {model_path}")
return
except Exception as e:
print(f"加载模型失败: {e}")
# 如果没有模型文件,训练新模型
print("训练新模型...")
self.train_model()
def train_model(self):
"""训练模型"""
# 这里应该从数据集中加载训练数据
# 由于我们没有真实数据集,这里创建模拟数据
print("注意: 使用模拟数据训练模型")
print("在实际应用中,请使用真实纸币图像数据集")
# 创建模拟数据
n_samples = 100
n_features = 20
# 生成随机特征
X = np.random.randn(n_samples, n_features)
# 生成标签(6种面额)
y = np.random.choice([1, 5, 10, 20, 50, 100], size=n_samples)
# 划分训练测试集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 特征标准化
self.scaler = StandardScaler()
X_train_scaled = self.scaler.fit_transform(X_train)
# 训练随机森林分类器
self.model = ensemble.RandomForestClassifier(
n_estimators=100,
max_depth=10,
random_state=42
)
self.model.fit(X_train_scaled, y_train)
# 评估模型
X_test_scaled = self.scaler.transform(X_test)
accuracy = self.model.score(X_test_scaled, y_test)
print(f"模型训练完成,测试集准确率: {accuracy:.2f}")
# 保存模型
self.save_model()
def save_model(self):
"""保存模型"""
model_dir = 'models'
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, 'banknote_classifier.pkl')
data = {
'model': self.model,
'scaler': self.scaler,
'feature_names': self.feature_names,
'classes': self.classes,
'config': self.config
}
with open(model_path, 'wb') as f:
pickle.dump(data, f)
print(f"模型已保存: {model_path}")
def extract_features_for_prediction(self, features: Dict[str, Any]) -> np.ndarray:
"""
为预测提取特征向量
Args:
features: 特征字典
Returns:
特征向量
"""
# 这里应该实现从特征字典到特征向量的转换
# 简化实现:创建固定长度的特征向量
feature_vector = []
# 添加全局颜色特征
if 'global_color' in features:
color_feats = features['global_color']
for key in ['h_mean', 's_mean', 'v_mean', 'h_std', 's_std', 'v_std', 'dominant_hue']:
if key in color_feats:
feature_vector.append(color_feats[key])
# 添加纹理特征
if 'global_texture' in features:
texture_feats = features['global_texture']
for i in range(10):
key = f'lbp_hist_{i}'
如果你觉得这个工具好用,欢迎关注我!
更多推荐
所有评论(0)