深度学习代码--深层卷积神经网络(DeepConvNet)
sys.path.append(os.pardir)# 为了导入父目录的文件而进行的设定"""识别率为99%以上的高精度的ConvNet网络结构如下所示"""# 初始化权重===========# 各层的神经元平均与前一层的几个神经元有连接(TODO:自动计算)wight_init_scales = np.sqrt(2.0 / pre_node_nums)# 使用ReLU的情况下推荐的初始值# 生
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
import pickle
import numpy as np
from collections import OrderedDict
from common.layers import *
class DeepConvNet:
"""识别率为99%以上的高精度的ConvNet
网络结构如下所示
conv - relu - conv- relu - pool -
conv - relu - conv- relu - pool -
conv - relu - conv- relu - pool -
affine - relu - dropout - affine - dropout - softmax
"""
def __init__(self, input_dim=(1, 28, 28),
conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
hidden_size=50, output_size=10):
# 初始化权重===========
# 各层的神经元平均与前一层的几个神经元有连接(TODO:自动计算)
pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
wight_init_scales = np.sqrt(2.0 / pre_node_nums) # 使用ReLU的情况下推荐的初始值
self.params = {}
pre_channel_num = input_dim[0]
for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
self.params['W' + str(idx+1)] = wight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
pre_channel_num = conv_param['filter_num']
self.params['W7'] = wight_init_scales[6] * np.random.randn(64*4*4, hidden_size)
self.params['b7'] = np.zeros(hidden_size)
self.params['W8'] = wight_init_scales[7] * np.random.randn(hidden_size, output_size)
self.params['b8'] = np.zeros(output_size)
# 生成层===========
self.layers = []
self.layers.append(Convolution(self.params['W1'], self.params['b1'],
conv_param_1['stride'], conv_param_1['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W2'], self.params['b2'],
conv_param_2['stride'], conv_param_2['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Convolution(self.params['W3'], self.params['b3'],
conv_param_3['stride'], conv_param_3['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W4'], self.params['b4'],
conv_param_4['stride'], conv_param_4['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Convolution(self.params['W5'], self.params['b5'],
conv_param_5['stride'], conv_param_5['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W6'], self.params['b6'],
conv_param_6['stride'], conv_param_6['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Affine(self.params['W7'], self.params['b7']))
self.layers.append(Relu())
self.layers.append(Dropout(0.5))
self.layers.append(Affine(self.params['W8'], self.params['b8']))
self.layers.append(Dropout(0.5))
self.last_layer = SoftmaxWithLoss()
def predict(self, x, train_flg=False):
for layer in self.layers:
if isinstance(layer, Dropout):
x = layer.forward(x, train_flg)
else:
x = layer.forward(x)
return x
def loss(self, x, t):
y = self.predict(x, train_flg=True)
return self.last_layer.forward(y, t)
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx, train_flg=False)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def gradient(self, x, t):
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
tmp_layers = self.layers.copy()
tmp_layers.reverse()
for layer in tmp_layers:
dout = layer.backward(dout)
# 设定
grads = {}
for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
grads['W' + str(i+1)] = self.layers[layer_idx].dW
grads['b' + str(i+1)] = self.layers[layer_idx].db
return grads
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, 'wb') as f:
pickle.dump(params, f)
def load_params(self, file_name="params.pkl"):
with open(file_name, 'rb') as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
self.layers[layer_idx].W = self.params['W' + str(i+1)]
self.layers[layer_idx].b = self.params['b' + str(i+1)]
这段代码是一个深层卷积神经网络(DeepConvNet),比我之前的 SimpleConvNet 复杂得多,设计目标是让 MNIST 识别率达到 99% 以上。我来详细逐段分析,帮你彻底搞懂:网络结构、前向/反向传播、权重初始化和 Dropout 的使用。
🧠 整体功能
这个类实现了一个深层 CNN,结构如下:
conv - relu - conv - relu - pool - conv - relu - conv - relu - pool - conv - relu - conv - relu - pool - affine - relu - dropout - affine - dropout - softmax
✅ 总结:
-
6 层卷积(分 3 个 Block,每个 Block 后接池化)
-
2 层全连接(加上 Dropout 防止过拟合)
-
输出层是 softmax 分类器
📜 逐段详细分析
1️⃣ 初始化权重
pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size]) wight_init_scales = np.sqrt(2.0 / pre_node_nums) # He初始化
✅ 说明
-
pre_node_nums: 记录每层卷积/全连接层的“前一层输入节点数” -
wight_init_scales: 使用 He 初始化(适合 ReLU 激活函数)-
保证初始权重不会太大或太小,避免梯度爆炸/消失。
-
2️⃣ 卷积层权重
for idx, conv_param in enumerate([...]): self.params['W' + str(idx+1)] = wight_init_scales[idx] * np.random.randn(...) self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
✅ 每个卷积层:
-
W: shape = (filter_num, in_channels, filter_size, filter_size) -
b: shape = (filter_num, )
⚡ 权重初始化 归一化处理,偏置初始化为 0。
3️⃣ 全连接层权重
self.params['W7'] = wight_init_scales[6] * np.random.randn(64*4*4, hidden_size) self.params['b7'] = np.zeros(hidden_size) self.params['W8'] = wight_init_scales[7] * np.random.randn(hidden_size, output_size) self.params['b8'] = np.zeros(output_size)
✅ 说明:
-
W7: 连接最后卷积层(输出通道 64,空间尺寸 4x4)到隐藏层 -
W8: 连接隐藏层到输出层(10 类别)
4️⃣ 构建网络层
self.layers = [] # conv - relu - conv - relu - pool self.layers.append(Convolution(...)) self.layers.append(Relu()) ... self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
✅ Block 重复 3 次:
-
两个卷积 + ReLU
-
池化层 (2x2)
✅ 全连接层后加:
-
Dropout
-
Dropout(0.5): 训练时随机丢弃 50% 神经元,防止过拟合
-
5️⃣ 前向传播
def predict(self, x, train_flg=False): for layer in self.layers: if isinstance(layer, Dropout): x = layer.forward(x, train_flg) else: x = layer.forward(x) return x
✅ 支持区分:
-
训练模式:
train_flg=True,Dropout 启用 -
测试模式:
train_flg=False,Dropout 关闭
6️⃣ 损失函数
def loss(self, x, t): y = self.predict(x, train_flg=True) return self.last_layer.forward(y, t)
✅ 损失层:
-
SoftmaxWithLoss: softmax + 交叉熵损失
7️⃣ 计算精度
def accuracy(self, x, t, batch_size=100): ... y = self.predict(tx, train_flg=False)
✅ 测试时关闭 Dropout(train_flg=False)
8️⃣ 反向传播
tmp_layers = self.layers.copy() tmp_layers.reverse() for layer in tmp_layers: dout = layer.backward(dout)
✅ 倒序调用每一层的 backward 方法,更新梯度。
9️⃣ 参数保存/加载
def save_params(self, file_name="params.pkl") def load_params(self, file_name="params.pkl")
✅ 保存权重到 pkl 文件,方便下次加载继续训练。
🔥 亮点
✅ 使用 深层结构:多个卷积块提取丰富特征
✅ He 初始化:适配 ReLU,避免梯度消失/爆炸
✅ Dropout:防止过拟合
✅ 分离训练/测试逻辑:train_flg 控制 Dropout 行为
更多推荐
所有评论(0)