目录1 研究背景12 卷积神经网络32.1 卷积神经网络模型32.2 局部感知域42.3 权值共享42.4 池化53 人脸识别系统63.1 图像采集63.2 人脸检测73.3 数据整理73.4 卷积神经网络的构建和训练83.5 人脸实时识别84 实验验证与结果分析84.1 数据来源84.2 代码实现84.3 实验结果分析95 结语9参考文献:91研究背景随着计算机视觉的相关理论与应用研究的快速发展
人脸,与指纹、虹膜等一样,作为生物识别的一个重要特征,在档案管理系统、安全验证系统、公安系统的罪犯追踪、视频监控等方面有着很广阔的应用前景。虽然,相对于指纹识别、虹膜识别技术来说,人脸特征的唯一性要差一些, 但是采集手段十分简单、方便、隐蔽,使用者也不会因为隐私等问题而产生抗拒心理。人脸识别技术具有以下优点:
(4)简易性:人脸识别系统应用摄像设备采集人脸信息进行识别,而对摄像设备的性能没有特殊要求,包括手机、摄像头在内的常见摄像设备都可使用, 而且在不需要其他辅助设备的情况下就能完成识别。此外,摄像设备可安置在高处或者不易被人察觉的地方,避免被人恶意破坏。
import numpy as np
import tensorflow as tf
import cv2
import os
import nn
import time
# function [boundingbox] = bbreg(boundingbox,reg)
def bbreg(boundingbox, reg):
"""Calibrate bounding boxes"""
if reg.shape[1] == 1:
reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
w = boundingbox[:, 2] - boundingbox[:, 0] + 1
h = boundingbox[:, 3] - boundingbox[:, 1] + 1
b1 = boundingbox[:, 0] + reg[:, 0] * w
b2 = boundingbox[:, 1] + reg[:, 1] * h
b3 = boundingbox[:, 2] + reg[:, 2] * w
b4 = boundingbox[:, 3] + reg[:, 3] * h
boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4]))
return boundingbox
def generateBoundingBox(imap, reg, scale, t):
"""Use heatmap to generate bounding boxes"""
stride = 2
cellsize = 12
imap = np.transpose(imap)
dx1 = np.transpose(reg[:, :, 0])
dy1 = np.transpose(reg[:, :, 1])
dx2 = np.transpose(reg[:, :, 2])
dy2 = np.transpose(reg[:, :, 3])
y, x = np.where(imap >= t)
if y.shape[0] == 1:
dx1 = np.flipud(dx1)
dy1 = np.flipud(dy1)
dx2 = np.flipud(dx2)
dy2 = np.flipud(dy2)
score = imap[(y, x)]
reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]]))
if reg.size == 0:
reg = np.empty((0, 3))
bb = np.transpose(np.vstack([y, x]))
q1 = np.fix((stride * bb + 1) / scale)
q2 = np.fix((stride * bb + cellsize - 1 + 1) / scale)
boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg])
return boundingbox, reg
# function pick = nms(boxes,threshold,type)
def nms(boxes, threshold, method):
if boxes.size == 0:
return np.empty((0, 3))
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
s = boxes[:, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
I = np.argsort(s)
pick = np.zeros_like(s, dtype=np.int16)
counter = 0
while I.size > 0:
i = I[-1]
pick[counter] = i
counter += 1
idx = I[0:-1]
xx1 = np.maximum(x1[i], x1[idx])
yy1 = np.maximum(y1[i], y1[idx])
xx2 = np.minimum(x2[i], x2[idx])
yy2 = np.minimum(y2[i], y2[idx])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
if method is 'Min':
o = inter / np.minimum(area[i], area[idx])
o = inter / (area[i] + area[idx] - inter)
I = I[np.where(o <= threshold)]
pick = pick[0:counter]
return pick
# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
def pad(total_boxes, w, h):
"""Compute the padding coordinates (pad the bounding boxes to square)"""
tmpw = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32)
tmph = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32)
numbox = total_boxes.shape[0]
dx = np.ones((numbox), dtype=np.int32)
dy = np.ones((numbox), dtype=np.int32)
edx = tmpw.copy().astype(np.int32)
edy = tmph.copy().astype(np.int32)
x = total_boxes[:, 0].copy().astype(np.int32)
y = total_boxes[:, 1].copy().astype(np.int32)
ex = total_boxes[:, 2].copy().astype(np.int32)
ey = total_boxes[:, 3].copy().astype(np.int32)
tmp = np.where(ex > w)
edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmpw[tmp], 1)
ex[tmp] = w
tmp = np.where(ey > h)
edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmph[tmp], 1)
ey[tmp] = h
tmp = np.where(x < 1)
dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1)
x[tmp] = 1
tmp = np.where(y < 1)
dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1)
y[tmp] = 1
return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
# function [bboxA] = rerec(bboxA)
def rerec(bboxA):
"""Convert bboxA to square."""
h = bboxA[:, 3] - bboxA[:, 1]
w = bboxA[:, 2] - bboxA[:, 0]
l = np.maximum(w, h)
bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - l * 0.5
bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - l * 0.5
bboxA[:, 2:4] = bboxA[:, 0:2] + np.transpose(np.tile(l, (2, 1)))
return bboxA
def create_mtcnn(sess, model_path):
with tf.variable_scope('pnet'):
data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
pnet = nn.PNet({'data':data})
pnet.load(os.path.join(model_path, 'det1.npy'), sess)
with tf.variable_scope('rnet'):
data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
rnet = nn.RNet({'data':data})
rnet.load(os.path.join(model_path, 'det2.npy'), sess)
with tf.variable_scope('onet'):
data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
onet = nn.ONet({'data':data})
onet.load(os.path.join(model_path, 'det3.npy'), sess)
pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
return pnet_fun, rnet_fun, onet_fun
def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
"""Detects faces in an image, and returns bounding boxes and points for them.
img: input image
minsize: minimum faces' size
pnet, rnet, onet: caffemodel
threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
minl=np.amin([h, w])
# create scale pyramid
while minl>=12:
scales += [m*np.power(factor, factor_count)]
minl = minl*factor
factor_count += 1
# first stage
for scale in scales:
im_data = cv2.resize(img, (hs, ws), interpolation=cv2.INTER_AREA)
im_data = (im_data-127.5)*0.0078125
img_x = np.expand_dims(im_data, 0)
img_y = np.transpose(img_x, (0,2,1,3))
out = pnet(img_y)
out0 = np.transpose(out[0], (0,2,1,3))
out1 = np.transpose(out[1], (0,2,1,3))
boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
# inter-scale nms
pick = nms(boxes.copy(), 0.5, 'Union')
if boxes.size>0 and pick.size>0:
boxes = boxes[pick,:]
total_boxes = np.append(total_boxes, boxes, axis=0)
numbox = total_boxes.shape[0]
if numbox>0:
pick = nms(total_boxes.copy(), 0.7, 'Union')
total_boxes = total_boxes[pick,:]
regw = total_boxes[:,2]-total_boxes[:,0]
regh = total_boxes[:,3]-total_boxes[:,1]
qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
total_boxes = rerec(total_boxes.copy())
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
numbox = total_boxes.shape[0]
if numbox>0:
# second stage
tempimg = np.zeros((24,24,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_AREA)
return np.empty()
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
out = rnet(tempimg1)
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
score = out1[1,:]
ipass = np.where(score>threshold[1])
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
if total_boxes.shape[0]>0:
pick = nms(total_boxes, 0.7, 'Union')
total_boxes = total_boxes[pick,:]
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
total_boxes = rerec(total_boxes.copy())
numbox = total_boxes.shape[0]
if numbox>0:
# third stage
total_boxes = np.fix(total_boxes).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
tempimg = np.zeros((48,48,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_AREA)
return np.empty()
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
out = onet(tempimg1)
out0 = np.transpose(out[0])
out2 = np.transpose(out[2])
score = out2[1,:]
ipass = np.where(score>threshold[2])
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
if total_boxes.shape[0]>0:
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
pick = nms(total_boxes.copy(), threshold[2], 'Min')
total_boxes = total_boxes[pick,:]
return total_boxes
# This method is kept for debugging purpose
# h=img.shape[0]
# w=img.shape[1]
# hs, ws = sz
# dx = float(w) / ws
# dy = float(h) / hs
# im_data = np.zeros((hs,ws,3))
# for a1 in range(0,hs):
# for a2 in range(0,ws):
# for a3 in range(0,3):
# im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
# return im_data
minsize = 20 # minimum size of face
thresh = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor face image pyramid 图像缩小尺度
margin = 44
def detection(img):
input: image
output: image of gray
mtcnn_model_path = 'mtcnn_model/'
print('Creating networks and loading parameters')
with tf.Graph().as_default():
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = create_mtcnn(sess, mtcnn_model_path)
t_start = time.time()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(400,400))
img_size = np.asarray(img.shape)[0:2]
bounding_boxes = detect_face(img, minsize, pnet, rnet, onet, thresh, factor)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if len(bounding_boxes) > 0:
for face in range(len(bounding_boxes)):
det = np.squeeze(bounding_boxes[face, 0:4])
(startX, startY, endX, endY) = det.astype("int")
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(img, (startX, startY), (endX, endY), (0, 0, 255), 2) # 用矩形标记人脸所在区域
cv2.putText(img,"{:.2f}%".format(bounding_boxes[face,4] * 100) ,
(startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.namedWindow('face', 0)
cv2.imshow('face', img)
img = img[startY:endY, startX:endX]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(32,32))
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
cv2.namedWindow('img_gray', 0)
cv2.imshow('img_gray', img_gray)
t_end = time.time()
print("MTCNN's run time: ", round((t_end-t_start)*1000,4),"ms")
return img_gray
if __name__ == '__main__':
mtcnn_model_path = 'mtcnn_model/'
print('Creating networks and loading parameters')
with tf.Graph().as_default():
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = create_mtcnn(sess, mtcnn_model_path)
faceImages_path = 'E:/faceImages' #人脸数据文件路径
faceImagesGray_path = 'E:/faceImageGray' #人脸检测后的数据文件路径
file_name = os.listdir(faceImages_path)
i, j = 0, 0
for file in file_name:
j = 0
for faceImage in os.listdir(faceImages_path + '/' + file):
t_start = time.time()
img = cv2.imread(faceImages_path + '/' + file + '/' + faceImage)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(400,400))
img_size = np.asarray(img.shape)[0:2]
bounding_boxes = detect_face(img, minsize, pnet, rnet, onet, thresh, factor)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if len(bounding_boxes) > 0:
for face in range(len(bounding_boxes)):
det = np.squeeze(bounding_boxes[face, 0:4])
(startX, startY, endX, endY) = det.astype("int")
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(img, (startX, startY), (endX, endY), (0, 0, 255), 2) # 用矩形标记人脸所在区域
cv2.putText(img,"{:.2f}%".format(bounding_boxes[face,4] * 100) ,
(startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
# cv2.namedWindow('face', 0)
# cv2.imshow('face', img)
# cv2.waitKey(1)
img = img[startY:endY, startX:endX]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(32,32))
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
cv2.imwrite(faceImagesGray_path + '/' + file + '/' + faceImage, img_gray)# 将拍摄内容保存为jpg图片
t_end = time.time()
print("run time_",j,":", round((t_end-t_start)*1000,4),"ms")
j += 1
i += 1