%1.先前向传播计算代价函数
X1=[ones(m,1),X];%5000*401,X为输入的训练数据
a2 =sigmoid(X1 * Theta1'); % 第二层激活函数输出,5000*401/401*25
a2 = [ones(m, 1) a2]; % 第二层加入b,5000*26
a3 = 1 ./ ( 1 + exp(-a2 * Theta2') ) ;%第三层输出(即输出层)
J1=0;
for i=1:num_labels
y1=(y==i);
%针对每个分类进行计算
J1=sum(y1 .* log( a3(:,i)) + (1 - y1 ) .* log( (1 - a3(:,i)) ))+J1;
end
%对代价函数进行正则化
temp1 = [zeros(size(Theta1,1),1) Theta1(:,2:end)]; % 先把theta(1)拿掉,不参与正则化
temp2 = [zeros(size(Theta2,1),1) Theta2(:,2:end)];
temp1 = sum(temp1 .^2); % 计算每个参数的平方,再就求和
temp2 = sum(temp2 .^2);
J= -1 /m*J1+lambda/(2*m) * ( sum(temp1(:))+ sum(temp2(:)) );
%2.后向传播算法计算偏导数
delta_1 = zeros(size(Theta1));
delta_2 = zeros(size(Theta2));
%2.1.前向传播计算输出(a3)
%2.2.计算总误差
err3=zeros(size(a3));%5000*10
for i=1:num_labels
err3(:,i)=a3(:,i)-(y==i);
end
%2.3.后向传播计算各层误差(由上一层误差和权值决定)
err2=Theta2'*err3';%26*10/10*5000
err2=err2(2:end,:).*((a2(:,2:end))'.*(1-a2(:,2:end))');%25*5000/25*5000
%2.4.计算偏导数
for i=1:m
delta_1 = delta_1+err2(:,i)*X1(i,:);%25*1/1*401
delta_2 = delta_2 + (err3(i,:))'* a2(i,:);%10*1/1*26
end
%2.5.加入正则化项
temp1 = [zeros(size(Theta1,1),1) Theta1(:,2:end)]; % 先把theta(1)拿掉,不参与正则化
temp2 = [zeros(size(Theta2,1),1) Theta2(:,2:end)];
Theta1_grad=1/m*delta_1+lambda/m*temp1;
Theta2_grad=1/m*delta_2+lambda/m*temp2;
%将参数从矩阵展开成向量,得到最终计算出的偏导数
grad = [Theta1_grad(:) ; Theta2_grad(:)];