我正在尝试用 pytorch 编写我的第一个神经网络。
不幸的是,当我想要得到损失时遇到了问题。
出现以下错误信息:
RuntimeError: Function 'LogSoftmaxBackward0' returned nan values in its 0th output.
于是我尝试调试,发现了一些奇怪的东西。
输入没有 nan 和 infs,我通过以下内容进行验证:
print(torch.any(torch.isnan(inputs)))
但如果我总是让模型 x 中的各个步骤输出,我发现在某个时刻将会出现 inf。
training
inputs, labels = data
print(torch.any(torch.isnan(inputs)))
optimizer.zero_grad()
outputs = model(inputs)
print(outputs)
loss = criterion(outputs, labels)
print(f"epoch: {epoch + 1} loss: {loss.item()}")
loss.backward()
optimizer.step()
model
class Net(Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = Conv1d(in_channels=1, out_channels=5, kernel_size=5, stride=2, dtype=torch.float64)
self.act1 = ReLU()
self.pool1 = MaxPool1d(2)
self.layer2 = Conv1d(in_channels=5, out_channels=1, kernel_size=2, dtype=torch.float64)
self.fcl1 = Linear(1350, 16, dtype=torch.float64)
def forward(self, x):
print("raw", x)
x = self.layer1(x)
print("conv1d 1", x)
x = self.act1(x)
print("relu", x)
x = self.layer2(x)
print("conv1d 2", x)
x = self.pool1(x)
x = self.pool1(x)
x = self.pool1(x)
x = self.pool1(x)
x = self.pool1(x)
x = self.pool1(x)
x = self.pool1(x)
print("pools", x)
x = self.fcl1(x)
print("linear", x)
return x
output
tensor(False)
raw tensor([[9.0616e+227, 2.4353e-152, 1.0294e-71, ..., 0.0000e+00,
0.0000e+00, 0.0000e+00]], dtype=torch.float64)
conv1d 1 tensor([[ -inf, -inf, -inf, ..., -0.2516, -0.2516, -0.2516],
[ inf, inf, inf, ..., 0.3377, 0.3377, 0.3377],
[ -inf, -inf, -inf, ..., 0.4285, 0.4285, 0.4285],
[ -inf, -inf, -inf, ..., -0.1230, -0.1230, -0.1230],
[ inf, inf, inf, ..., 0.3793, 0.3793, 0.3793]],
dtype=torch.float64, grad_fn=<SqueezeBackward1>)
relu tensor([[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[ inf, inf, inf, ..., 0.3377, 0.3377, 0.3377],
[0.0000, 0.0000, 0.0000, ..., 0.4285, 0.4285, 0.4285],
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[ inf, inf, inf, ..., 0.3793, 0.3793, 0.3793]],
dtype=torch.float64, grad_fn=<ReluBackward0>)
conv1d 2 tensor([[ -inf, -inf, -inf, ..., -5.4167e+265,
-5.4167e+265, -5.4167e+265]], dtype=torch.float64,
grad_fn=<SqueezeBackward1>)
pools tensor([[ -inf, -5.4167e+265, -5.4167e+265, ..., -5.4167e+265,
-5.4167e+265, -5.4167e+265]], dtype=torch.float64,
grad_fn=<SqueezeBackward1>)
linear tensor([[inf, inf, -inf, -inf, -inf, inf, inf, inf, inf, inf, inf, -inf, inf, inf, -inf, -inf]],
dtype=torch.float64, grad_fn=<AddmmBackward0>)
tensor([[inf, inf, -inf, -inf, -inf, inf, inf, inf, inf, inf, inf, -inf, inf, inf, -inf, -inf]],
dtype=torch.float64, grad_fn=<AddmmBackward0>)
epoch: 1 loss: nan
感谢您的帮助