Python实现简单神经网络
使用Python + NumPy库,实现神经网络。该网络具有一层输入层,一层隐藏层,一层输出层。求梯度采用的方法是数值微分求梯度。由于该算法效率较差,所以只训练了10次。最终的正确率停留在9%左右。(唯一的感觉就是数值积分确实好理解,但是太慢了!)from dataset.mnist import load_mnistimport numpy as npbatch_...
·
使用Python + NumPy库,实现神经网络。
该网络具有一层输入层,一层隐藏层,一层输出层。
求梯度采用的方法是数值微分求梯度。
由于该算法效率较差,所以只训练了10次。
最终的正确率停留在9%左右。
(唯一的感觉就是数值积分确实好理解,但是太慢了!)
from dataset.mnist import load_mnist
import numpy as np
batch_size = 100
learning_rate = 0.01
hidden_size = 10
# 隐藏层激活函数
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# 输出层激活函数 分类一般使用softmax
# 输入信号的指数 / 所有信号的指数和
def softmax(x):
t = np.max(x)
exp_a = np.exp(x - t)
sum_exp_a = np.sum(exp_a)
return exp_a / sum_exp_a
# 交叉熵损失函数
def cross_entropy_loss(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
batch_size = y.shape[0]
return -np.sum(t * np.log(y + 1e-7)) / batch_size
# 梯度下降 计算损失函数对指定变量的数值微分
# f'(x) = (f(x+h) - f(x-h)) / 2h
def numerical_gradient(f, x):
h = 1e-4
gradient = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
# while it.hasnext():
# 遍历参数x里的每一个值,进行微量修改
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x) # 计算 f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # 计算 f(x-h)
# 数值微分求导
gradient[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val
it.iternext()
return gradient
# 神经网络
class NN:
# 定义参数 权重和偏置
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
self.param = {}
self.param['W1'] = np.random.randn(input_size, hidden_size) * weight_init_std
self.param['b1'] = np.zeros(hidden_size)
self.param['W2'] = np.random.randn(hidden_size, output_size) * weight_init_std
self.param['b2'] = np.zeros(output_size)
# 预测 前向传播
def predict(self, x):
z = sigmoid(np.dot(x, self.param['W1']) + self.param['b1'])
y = softmax(np.dot(z, self.param['W2']) + self.param['b2'])
return y
# 计算损失函数
def loss(self, x, y_):
return cross_entropy_loss(self.predict(x), y_)
# 训练 反向传播修正参数
def train(self, x, y_):
# 这里定义lambda表达式会自动保存一份(x, y_)
# 所以即使在别的区域调用该函数也不会因为变量生存期而出错
loss_f = lambda W: self.loss(x, y_)
self.param['W1'] += learning_rate * numerical_gradient(loss_f, self.param['W1'])
self.param['b1'] += learning_rate * numerical_gradient(loss_f, self.param['b1'])
self.param['W2'] += learning_rate * numerical_gradient(loss_f, self.param['W2'])
self.param['b2'] += learning_rate * numerical_gradient(loss_f, self.param['b2'])
# 测试 测试准确率
def test(self, x, y_):
y = self.predict(x)
return np.sum(np.argmax(y, axis=1) == np.argmax(y_, axis=1)) / x.shape[0]
# 随机从数据中取出一个小批量数据集
def next_batch(x, t, batch_size):
batch_mask = np.random.choice(x.shape[0], batch_size)
x_batch = x[batch_mask]
t_batch = t[batch_mask]
return x_batch, t_batch
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)
print(x_train.shape) # (60000, 784)
print(t_train.shape) # (60000, 10)
print(x_test.shape) # (10000, 784)
print(t_test.shape) # (10000, 10)
nn = NN(784, hidden_size, 10)
# 训练比较慢,所以只训练10次
for step in range(10):
print("training %i step"%step)
mini_batch = next_batch(x_train, t_train, batch_size)
nn.train(mini_batch[0], mini_batch[1])
print(nn.test(x_test, t_test))
更多推荐
所有评论(0)