pytorch 自动构建任意层的深度神经网络(DNN)

根据可变参数自动构建深度神经网络

夜晓岚渺渺

4409人浏览 · 2021-06-28 22:24:43

夜晓岚渺渺 · 2021-06-28 22:24:43 发布

动手撸神经网络的代码，是大家常常遇到的问题。在设计自己的网络时，需要考虑网络大小，隐藏层层数，激活函数和参数初始化方法。最笨拙的方法就是固定下来，发生变化就要手动调整一次。这里介绍一种可以自动生生网络的方法，每次改变只需要调整一些参数，网络就会自动改变，大大提升了生产代码的效率。
参考链接：
1、Pytorch之搭建神经网络的四种方法
2、Pytorch–1.使用Pytorch搭建一个简易的神经网络
3、十分钟掌握Pytorch搭建神经网络的流程
4、使用pytorch搭建神经网络
5、PyTorch使用教程-PyTorch构建神经网络(下)
6、Pytorch 默认参数初始化
7、Pytorch中常用的四种优化器SGD、Momentum、RMSProp、Adam

# -*- coding: utf-8 -*-
"""
Created on 2021.06.18
@author: LXA
"""
import torch
import torch.nn as tn
import torch.nn.functional as tnf
from torch.nn.parameter import Parameter
import numpy as np

class my_actFunc(tn.Module):
    def __init__(self, actName='linear'):
        super(my_actFunc, self).__init__()
        self.actName = actName

    def forward(self, x_input):
        if str.lower(self.actName) == 'relu':
            out_x = tnf.relu(x_input)
        elif str.lower(self.actName) == 'leaky_relu':
            out_x = tnf.leaky_relu(x_input)
        elif str.lower(self.actName) == 'tanh':
            out_x = torch.tanh(x_input)
        elif str.lower(self.actName) == 'srelu':
            out_x = tnf.relu(x_input)*tnf.relu(1-x_input)
        elif str.lower(self.actName) == 'elu':
            out_x = tnf.elu(x_input)
        elif str.lower(self.actName) == 'sin':
            out_x = torch.sin(x_input)
        elif str.lower(self.actName) == 'sigmoid':
            out_x = tnf.sigmoid(x_input)
        else:
            out_x = x_input
        return out_x
        
# ----------------dense net(constructing NN and initializing weights and bias )------------
class Pure_DenseNet(tn.Module):
    """
    Args:
        indim: the dimension for input data
        outdim: the dimension for output
        hidden_units: the number of  units for hidden layer, a list or a tuple
        name2Model: the name of using DNN type, DNN , ScaleDNN or FourierDNN
        actName2in: the name of activation function for input layer
        actName: the name of activation function for hidden layer
        actName2out: the name of activation function for output layer
        scope2W: the namespace of weight
        scope2B: the namespace of bias
        type2float: the numerical type
        to_gpu: using GPU or not
        gpu_no: if the GPU is required, the no of GPU
    """
    def __init__(self, indim=1, outdim=1, hidden_units=None, name2Model='DNN', actName2in='tanh', actName='tanh',
                 actName2out='linear', scope2W='Weight', scope2B='Bias', type2float='float32', to_gpu=False, gpu_no=0):
        super(Pure_DenseNet, self).__init__()
        self.indim = indim
        self.outdim = outdim
        self.hidden_units = hidden_units
        self.name2Model = name2Model
        self.actFunc_in = my_actFunc(actName=actName2in)
        self.actFunc = my_actFunc(actName=actName)
        self.actFunc_out = my_actFunc(actName=actName2out)
        self.dense_layers = tn.ModuleList()

        input_layer = tn.Linear(in_features=indim, out_features=hidden_units[0])
        tn.init.xavier_normal_(input_layer.weight)
        tn.init.uniform_(input_layer.bias, -1, 1)
        self.dense_layers.append(input_layer)

        for i_layer in range(len(hidden_units)-1):
            hidden_layer = tn.Linear(in_features=hidden_units[i_layer], out_features=hidden_units[i_layer+1])
            tn.init.xavier_normal_(hidden_layer.weight)
            tn.init.uniform_(hidden_layer.bias, -1, 1)
            self.dense_layers.append(hidden_layer)

        out_layer = tn.Linear(in_features=hidden_units[-1], out_features=outdim)
        tn.init.xavier_normal_(out_layer.weight)
        tn.init.uniform_(out_layer.bias, -1, 1)
        self.dense_layers.append(out_layer)

    def get_regular_sum2WB(self, regular_model='L2'):
        regular_w = 0
        regular_b = 0
        if regular_model == 'L1':
            for layer in self.dense_layers:
                regular_w = regular_w + torch.sum(torch.abs(layer.weight))
                regular_b = regular_b + torch.sum(torch.abs(layer.bias))
        elif regular_model == 'L2':
            for layer in self.dense_layers:
                regular_w = regular_w + torch.sum(torch.mul(layer.weight, layer.weight))
                regular_b = regular_b + torch.sum(torch.mul(layer.bias, layer.bias))
        return regular_w + regular_b

    def forward(self, inputs, scale=None, training=None, mask=None):
        # ------ dealing with the input data ---------------
        dense_in = self.dense_layers[0]
        H = dense_in(inputs)
        H = self.actFunc_in(H)

        #  ---resnet(one-step skip connection for two consecutive layers if have equal neurons）---
        hidden_record = self.hidden_units[0]
        for i_layer in range(0, len(self.hidden_units)-1):
            H_pre = H
            dense_layer = self.dense_layers[i_layer+1]
            H = dense_layer(H)
            H = self.actFunc(H)
            if self.hidden_units[i_layer + 1] == hidden_record:
                H = H + H_pre
            hidden_record = self.hidden_units[i_layer + 1]

        dense_out = self.dense_layers[-1]
        H = dense_out(H)
        H = self.actFunc_out(H)
        return H


class DNN_test(tn.Module):
    def __init__(self, dim_in=2, dim_out=1, hidden_layers=None, name2Model='DNN', actName_in='tanh',
                 actName_hidden='tanh', actName_out='linear', use_gpu=False, no2gpu=0):
        super(DNN_test, self).__init__()
        self.name2Model = name2Model
        self.dim_in = dim_in
        self.dim_out = dim_out
        if name2Model == 'DNN':
            self.DNN = Pure_DenseNet(indim=dim_in, outdim=dim_out, 
                                     hidden_units=hidden_layers, name2Model=name2Model,
                                     actName2in=actName_in, actName=actName_hidden,    
                                     actName2out=actName_out, to_gpu=use_gpu, 
                                     gpu_no=no2gpu)

    def forward(self, x_input, freq=None):
        out = self.DNN(x_input, scale=freq)
        return out

    def get_sum2wB(self):
        sum2WB = self.DNN.get_regular_sum2WB()
        return sum2WB

    def cal_l2loss(self, x_input=None, freq=None, y_input=None):
        out = self.DNN(x_input, scale=freq)
        squre_loss = torch.mul(y_input - out, y_input - out)
        loss = torch.mean(squre_loss, dim=0)
        return loss, out


def test_DNN():
    batch_size = 10
    dim_in = 2
    dim_out = 1
    hidden_list = (10, 20, 10, 10, 20)
    freq = np.array([1, 2, 3, 4], dtype=np.float32)
    model_name = 'DNN'
    init_lr = 0.01
    max_it = 10000
    with_gpu = True

    model = DNN_test(dim_in=dim_in, dim_out=dim_out, hidden_layers=hidden_list, name2Model=model_name, actName_in='tanh',
                actName_hidden='tanh', use_gpu=with_gpu, no2gpu=0)
    if with_gpu:
        model = model.cuda(device='cuda:'+str(0))

    params2Net = model.DNN.parameters()

    # 定义优化方法，并给定初始学习率
    # optimizer = torch.optim.SGD(params2Net, lr=init_lr)                     # SGD
    # optimizer = torch.optim.SGD(params2Net, lr=init_lr, momentum=0.8)       # momentum
    # optimizer = torch.optim.RMSprop(params2Net, lr=init_lr, alpha=0.95)     # RMSProp
    optimizer = torch.optim.Adam(params2Net, lr=init_lr)  # Adam

    # 定义更新学习率的方法
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
    # scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: 1/(epoch+1))
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.995)
    arr2epoch = []
    arr2loss = []
    arr2lr = []
    for i_epoch in range(max_it):
        x = np.random.rand(batch_size, dim_in)
        x = x.astype(dtype=np.float32)
        torch_x = torch.from_numpy(x)
        y = np.reshape(np.sin(x[:, 0] * x[:, 0] + x[:, 1] * x[:, 1]), newshape=(-1, 1))
        torch_y = torch.from_numpy(y)
        if with_gpu:
            torch_x = torch_x.cuda(device='cuda:'+str(0))
            torch_y = torch_y.cuda(device='cuda:' + str(0))

        loss, prediction = model.cal_l2loss(x_input=torch_x, freq=freq, y_input=torch_y)
        sum2wb = model.get_sum2wB()

        optimizer.zero_grad()  # 求导前先清零, 只要在下一次求导前清零即可
        loss.backward()  # 求偏导
        optimizer.step()  # 更新参数
        scheduler.step()

        if i_epoch % 100 == 0:
            print('i_epoch --- loss:', i_epoch, loss.item())
            # print("第%d个epoch的学习率：%f" % (i_epoch, optimizer.param_groups[0]['lr']))
            arr2loss.append(loss.item())
            arr2lr.append(optimizer.param_groups[0]['lr'])

    plt.figure()
    ax = plt.gca()
    plt.plot(arr2loss, 'b-.', label='loss')
    plt.xlabel('epoch/100', fontsize=14)
    plt.ylabel('loss', fontsize=14)
    plt.legend(fontsize=18)
    ax.set_yscale('log')
    plt.show()

    # plt.cla()
    # plt.plot(x[:, 0], x[:, 1], y, 'b*')
    # plt.show()


if __name__ == "__main__":
    test_DNN()

腾讯云开发者社区

腾讯云面向开发者汇聚海量精品云计算使用和开发经验，营造开放的云计算技术生态圈。

更多推荐

cover

自动化提示词生成工具盘点

腾讯云开发者社区

cover

怎么用电脑兼职赚钱，普通人可做的6个副业项目（非常详细）零基础入门到精通，收藏这篇就够了

腾讯云开发者社区

cover

AI PPT免费使用技巧盘点：如何快速制作专业PPT？

腾讯云开发者社区

所有评论(0)

查看更多评论

夜晓岚渺渺

已为社区贡献6条内容