Torch 熟悉

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import torch
x = torch.randn(2,3)
print(x)
x = ((x*2)/(x-1))
print(x)
print(x.requires_grad)

# 把variable的设置grad给tensor了
x.requires_grad_(True)
print(x.requires_grad)

y = (x*x).sum()

y.backward()
# y = x^2
#dy/dx = 2 * x
print(x.grad)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
a = torch.randn(2,3)
print(a)
b = ((a*2)/(a-1))
print(b)
print("a grad is ",a.requires_grad)
print("b grad is ",b.requires_grad)
a.requires_grad_(True)
b.requires_grad_(True)
print("a grad is ",a.requires_grad)
print("b grad is ",b.requires_grad)

c = (b*b).sum()

c.backward()
# b = a*2/(a-1)
#db/da = 2ln(a-1)
# c = b^2
# dc/db = 2b
print("a_grad = ",a.grad)
print("b_grad = ",b.grad)
tensor([[ 1.3270, -2.5966, -0.1547],
        [ 0.3614,  1.0595,  0.8459]])
tensor([[  8.1156,   1.4439,   0.2679],
        [ -1.1320,  35.5890, -10.9797]])
a grad is  False
b grad is  False
a grad is  True
b grad is  True
a_grad =  None
b_grad =  tensor([[ 16.2312,   2.8879,   0.5359],
        [ -2.2639,  71.1781, -21.9595]])
1
2
3
4
5
6
7
8
9
10
11
a = torch.ones(2,3)+1
print(a)
b = ((a*2)/(a-1))
print(b)
print("a grad is ",a.requires_grad)
a.requires_grad_(True)
print("a grad is ",a.requires_grad)

b.backward()

print("a_grad = ",a.grad)
tensor([[ 2.,  2.,  2.],
        [ 2.,  2.,  2.]])
tensor([[ 4.,  4.,  4.],
        [ 4.,  4.,  4.]])
a grad is  False
a grad is  True



---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-31-b43c2a9b1b1c> in <module>()
     13 
     14 # c.backward()
---> 15 b.backward()
     16 # b = a*2/(a-1)
     17 #db/da = 2ln(a-1)

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
x = torch.randn(3,requires_grad = True)

y = x*2

# y.data.norm() 是求 y 的范数,就是 y 中的元素全部 p 次方,然后开 p 次方,默认 p=2
while y.data.norm() < 1000:
y = y*2

print(y)
gradients = torch.tensor([0.1,1.0,0.0001],dtype = torch.float)

# backward 自动和参数相乘
y.backward(gradients)
print(x.grad)

print(x.requires_grad)
print((x ** 2).requires_grad)
with torch.no_grad():
print((x ** 2).requires_grad)

print((x ** 2).requires_grad)
tensor([-571.8924,  188.2365, -904.6218])
tensor([  204.8000,  2048.0000,     0.2048])
True
True
False
True
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
# 定义一个网络
def __init__(self):
super(Net, self).__init__()

# 1 个图像输入通道,6 个输出通道, 5x5 的卷积核
self.conv1 = nn.Conv2d(1, 6, 5)
#6 个输入通道,16 个输出通道
self.conv2 = nn.Conv2d(6, 16, 5)

#1 个线性函数 y = Wx +b
self.fc1 = nn.Linear(16 * 5 *5 ,120)
self.fc2 = nn.Linear(120 , 84)
self.fc3 = nn.Linear(84,10)

def forward(self, x):
# 2x2 的池化窗口
x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))

# 如果池话窗口为方形,你也可以设一个参数
x = F.max_pool2d(F.relu(self.conv2(x)),2)

x = x.view(-1,self.num_flat_features(x)) # 相当于把 16 个 5*5 的矩阵拍平成 16 个 25 的
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x

def num_flat_features(self, x):
size = x.size()[1:] # 得到除第一维的其他维之和,如:3*4*5*6 则最后返回 4*5*6 = 120
num_features = 1
for s in size:
num_features *= s
return num_features

net = Net()
print(net)
Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
1
2
3
4
# 查看一下参数
params = list(net.parameters())
print(len(params))
print(params[2].size())
10
torch.Size([16, 6, 5, 5])
1
2
3
4
# 载入数据
input = torch.randn(1,1,32,32)
out = net(input)
print(out)
tensor([[-0.0400,  0.0145, -0.0109, -0.0255,  0.0603,  0.0657, -0.0557,
         -0.0260,  0.0959,  0.1870]])
1
2
net.zero_grad()
out.backward(torch.randn(1,10))
1
2
3
4
5
6
7
output = net(input)
target = torch.arange(1,11)
target = target.view(1,-1)
criterion = nn.MSELoss()

loss = criterion(output,target)
print(loss)
tensor(37.9689)
1
2
3
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU
<MseLossBackward object at 0x00000275FF44FDA0>
<AddmmBackward object at 0x00000275FF44FBA8>
<ExpandBackward object at 0x00000275FF44FDA0>
1
2
3
4
net.zero_grad()
print(net.conv1.bias.grad)
loss.backward()
print(net.conv1.bias.grad)
tensor([ 0.,  0.,  0.,  0.,  0.,  0.])
tensor([-0.1013,  0.0209, -0.0838, -0.0878, -0.0151,  0.0398])
1
2
3
learning_rate = 0.01
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate)
1
2
3
4
5
6
7
8
9
10
11
12
# 使用内置 更新权重
import torch.optim as optim

# 做一个自己的 optimizer
optimizer = optim.SGD(net.parameters(),lr = 0.01)

# 训练
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()