深度学习框架PyTorch:从入门到精通的8个实战项目
从简单的手写数字识别到复杂的Transformer,相信你已经对PyTorch有了全面的了解。记住,编程最重要的就是多练习。拿着这些代码,自己改改参数,加加功能,很快你就能成为PyTorch高手!学习路上遇到不懂的,随时来问我啊!
深度学习框架PyTorch:从入门到精通的8个实战项目
想学PyTorch但不知道从哪下手?别急,今天给大家准备了8个超棒的实战项目,带你从PyTorch小白变成深度学习高手!这些项目难度由浅入深,涵盖了图像分类、目标检测、自然语言处理等多个领域。跟着我一起动手做项目,保证你学得又快又好!
项目一:手写数字识别
咱们先从最经典的手写数字识别开始。这个项目用到的是MNIST数据集,里面有6万张训练图片和1万张测试图片,每张图片都是28x28像素的手写数字。
代码怎么写?看这里:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
# 定义神经网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(28 * 28, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 10)
def forward(self, x):
x = torch.flatten(x, 1)
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
# 加载数据
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
# 训练模型
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for epoch in range(5):
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
温馨提示:别忘了把数据集下载到 ./data 目录哦!

项目二:猫狗分类器
接下来,咱们来做个更实用的项目 —— 猫狗分类器。这次用到的是Kaggle上的Dogs vs. Cats数据集。不过这回咱们不自己搭网络了,直接用预训练的ResNet18,省事儿!
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
# 加载预训练模型
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 2) # 修改最后一层,输出2个类别
# 定义图像预处理
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# 加载和预处理图像
img = Image.open('cat.jpg')
img_tensor = transform(img).unsqueeze(0)
# 进行预测
model.eval()
with torch.no_grad():
output = model(img_tensor)
_, predicted = torch.max(output, 1)
print('预测结果:', '猫' if predicted.item() == 0 else '狗')
项目三:情感分析
好了,咱们玩够图像了,来点自然语言处理的东西吧!这个项目我们要做情感分析,就是判断一句话是正面还是负面情绪。我们用LSTM(长短时记忆网络)来搞定这个任务。
import torch
import torch.nn as nn
class SentimentLSTM(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim):
super(SentimentLSTM, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, 1)
def forward(self, x):
embedded = self.embedding(x)
_, (hidden, _) = self.lstm(embedded)
out = self.fc(hidden.squeeze(0))
return torch.sigmoid(out)
# 假设我们已经有了词典和训练数据
vocab_size = 10000
embedding_dim = 100
hidden_dim = 256
model = SentimentLSTM(vocab_size, embedding_dim, hidden_dim)
温馨提示:这个模型需要大量文本数据来训练,如果你手头没有数据,可以去找找IMDb电影评论数据集,那里有25000条带标签的评论。

项目四:图像风格迁移
现在咱们来点高端的!图像风格迁移,就是把一张图片的风格”搬”到另一张图片上。比如把你的自拍变成梵高的画风,酷不酷?这个项目用到了预训练的VGG19模型。
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
# 加载预训练的VGG19模型
vgg = models.vgg19(pretrained=True).features
# 定义内容损失和风格损失
class ContentLoss(nn.Module):
def __init__(self, target):
super(ContentLoss, self).__init__()
self.target = target.detach()
def forward(self, input):
self.loss = nn.functional.mse_loss(input, self.target)
return input
def gram_matrix(input):
b, c, h, w = input.size()
features = input.view(b * c, h * w)
G = torch.mm(features, features.t())
return G.div(b * c * h * w)
class StyleLoss(nn.Module):
def __init__(self, target_feature):
super(StyleLoss, self).__init__()
self.target = gram_matrix(target_feature).detach()
def forward(self, input):
G = gram_matrix(input)
self.loss = nn.functional.mse_loss(G, self.target)
return input
# 接下来就是把这些损失函数塞进VGG模型,然后开始训练...
这个项目有点复杂,我只给了核心代码。要完整实现还得加上图片加载、模型训练的代码。不过你要是能理解这些核心概念,剩下的就是体力活了!
项目五:生成对抗网络(GAN)
GAN可以说是近年来最火的深度学习模型之一了。它能生成假得你分不清真假的图片、视频,甚至音乐!我们来实现一个简单的GAN,用它来生成手写数字。
import torch
import torch.nn as nn
# 生成器
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.model = nn.Sequential(
nn.Linear(100, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.ReLU(),
nn.Linear(512, 784),
nn.Tanh()
)
def forward(self, z):
img = self.model(z)
return img.view(-1, 28, 28)
# 判别器
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.model = nn.Sequential(
nn.Linear(784, 512),
nn.LeakyReLU(0.2),
nn.Linear(512, 256),
nn.LeakyReLU(0.2),
nn.Linear(256, 1),
nn.Sigmoid()
)
def forward(self, img):
flattened = img.view(-1, 784)
return self.model(flattened)
# 训练过程比较复杂,这里就不写了
温馨提示:训练GAN是个技术活,需要仔细调参,不然容易出现模式崩溃(生成器只生成一种图像)或判别器太强导致生成器学不到东西。

项目六:神经网络机器翻译
下面我们来挑战下更难的任务 —— 机器翻译!我们用Seq2Seq模型来实现英语到法语的翻译。这个模型包含一个编码器和一个解码器,都是用LSTM实现的。
import torch
import torch.nn as nn
class Encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
super().__init__()
self.embedding = nn.Embedding(input_dim, emb_dim)
self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, src):
embedded = self.dropout(self.embedding(src))
outputs, (hidden, cell) = self.rnn(embedded)
return hidden, cell
class Decoder(nn.Module):
def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
super().__init__()
self.embedding = nn.Embedding(output_dim, emb_dim)
self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
self.fc_out = nn.Linear(hid_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, input, hidden, cell):
input = input.unsqueeze(0)
embedded = self.dropout(self.embedding(input))
output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
prediction = self.fc_out(output.squeeze(0))
return prediction, hidden, cell
# Seq2Seq模型把编码器和解码器组合在一起
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder):
super().__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, src, trg, teacher_forcing_ratio=0.5):
# 实现训练过程...
这个模型的训练过程比较复杂,涉及到teacher forcing(用真实的上一个输出作为下一个输入)等技巧。你可以去查查相关资料,或者直接上手试试看!
项目七:强化学习玩游戏
强化学习也是近年来很火的一个方向。我们来实现一个简单的DQN(Deep Q-Network)智能体,让它学会玩CartPole游戏。这个游戏的目标是通过左右移动小车来保持杆子直立。
import gym
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
# DQN网络
class DQN(nn.Module):
def __init__(self, state_size, action_size):
super(DQN, self).__init__()
self.fc1 = nn.Linear(state_size, 24)
self.fc2 = nn.Linear(24, 24)
self.fc3 = nn.Linear(24, action_size)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
return self.fc3(x)
# 创建环境和DQN
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
dqn = DQN(state_size, action_size)
optimizer = optim.Adam(dqn.parameters())
criterion = nn.MSELoss()
# 训练循环
for episode in range(1000):
state = env.reset()
done = False
while not done:
action = dqn(torch.FloatTensor(state)).argmax().item()
next_state, reward, done, _ = env.step(action)
# 更新Q值...
state = next_state
if episode % 10 == 0:
print(f'Episode {episode}, Total reward: {total_reward}')
温馨提示:强化学习的训练过程可能会很不稳定,需要很多轮才能看到效果。耐心点,别灰心!
项目八:注意力机制与Transformer
最后,我们来实现深度学习界的”明星”模型 —— Transformer。这个模型在2017年被提出后,彻底改变了自然语言处理领域。我们来实现它的核心部分:多头注意力机制。
import torch
import torch.nn as nn
class MultiHeadAttention(nn.Module):
def __init__(self, d_model, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.d_model = d_model
assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
self.d_k = d_model // num_heads
self.W_q = nn.Linear(d_model, d_model)
self.W_k = nn.Linear(d_model, d_model)
self.W_v = nn.Linear(d_model, d_model)
self.W_o = nn.Linear(d_model, d_model)
def scaled_dot_product_attention(self, Q, K, V, mask=None):
attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.d_k, dtype=torch.float32))
if mask is not None:
attn_scores = attn_scores.masked_fill(mask == 0, -1e9)
attn_probs = torch.softmax(attn_scores, dim=-1)
output = torch.matmul(attn_probs, V)
return output
def forward(self, Q, K, V, mask=None):
batch_size = Q.size(0)
Q = self.W_q(Q).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
K = self.W_k(K).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
V = self.W_v(V).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
output = self.scaled_dot_product_attention(Q, K, V, mask)
output = output.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
return self.W_o(output)
这段代码实现了Transformer的核心 —— 多头注意力机制。理解这部分对于掌握现代NLP模型至关重要。
好了,咱们的8个项目就到这里啦!从简单的手写数字识别到复杂的Transformer,相信你已经对PyTorch有了全面的了解。记住,编程最重要的就是多练习。拿着这些代码,自己改改参数,加加功能,很快你就能成为PyTorch高手!学习路上遇到不懂的,随时来问我啊!
更多推荐
所有评论(0)