$\begin{cases} \displaystyle \frac{\partial J}{\partial z_j^L}=\sum_k\frac{\partial J}{\partial a _k^L}\frac{\partial a_k^L}{\partial z_j^L} = \frac{\partial J}{\partial a_j^L}\frac{\partial a_j^L}{\partial z_j^L} \\ \displaystyle \frac{\partial J}{\partial a_j^L} = \frac{\partial (\frac{1}{2}(y_j - t_i)^2)}{\partial t_i} = t_k - y_k \\ \displaystyle \frac{\partial a_j^L }{\partial z_j^L} = \text{softmax}’(z_j) = y_i [(y_i - t_i) + \sum_{j = 1}^M (t_j - y_j)y_j] \\ \displaystyle \delta^L_j = (t_j - y_j)[(y_j - t_j) + \sum_{i = 1}^{M}(t_i - y_i)y_i] \end{cases}$

$\begin{cases} \displaystyle \delta_j^l = \frac{\partial J}{ \partial z_j^l} = \sum_k \frac{\partial J}{\partial z_k^{l + 1}} \frac{\partial z_k^{l + 1}}{\partial z_j ^l} = \sum_k \frac{\partial z_k^{l + 1}}{\partial z_j ^l}\delta_k^{l + 1} \\ \displaystyle \frac{\partial z_k^{l + 1}}{\partial z_j^l} = \sum_k w_{kj}^{l + 1}\sigma’(z_j^l) \\ \displaystyle \delta _j^l = \sum_kw_{kj}^{l+1}\delta_k^{l + 1}\text{sigmoid}’(z_j^l) \end{cases}$

0%