我在乳胶中写下了以下方程式:
\documentclass[12pt]{article}
\usepackage[utf8]{inputenc}
\usepackage{tikz}
\usepackage{hyperref}
\usepackage{url}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{bbold}
\usepackage{fancyvrb}
\usepackage{movie15}
\usepackage{array}
\title{Getting started}
\author{DD}
\date{03/14/15}
\begin{document}
\maketitle
\noindent 1)Output layer:
$$\frac{\partial E(W)}{\partial W_{jk}^L} = \frac{\partial}{\partial W_{jk}^L} \frac{1}{2}\sum_{k} (O_k - t_k)^2 = (O_k - t_k)\frac{\partial}{\partial W_{jk}}O_k $$
$t_k$ is constant and $O_k = \sigma(x_k)$ by the definition(sigmoid activation function gives an output).
$$(O_k - t_k)\frac{\partial}{\partial W_{jk}} O_k = (O_k - t_k)\frac{\partial}{\partial W_{jk}} \sigma(x_k) = (O_k - t_k)\sigma(x_k)(1 - \sigma(x_k))\frac{\partial}{\partial W_{jk}} x_k$$
$$= (O_k - t_k)\sigma(x_k)(1 - \sigma(x_k)) O_j =
(O_k - t_k)O_k(1 - O_k)O_j$$
Where $\frac{\partial}{\partial W_{jk}} x_k = O_j$, because $x_k = O_j * W_{jk}$ and derivative w.r.t $W_{jk}$ is equal to $1$.
Let $(O_k - t_k)O_k(1 - O_k) = \delta_k$. Then $$\frac{\partial E(W)}{\partial W_{jk}^L} = O_j\delta_k$$\newline
\noindent 2)Hidden layer:
$$\frac{\partial E(W)}{\partial W_{ij}^L} = \frac{\partial }{\partial W_{ij}^L} \frac{1}{2}\sum_{k \in K} (O_k - t_k)^2 = \sum_{k \in K}(O_k - t_k)\frac{\partial}{\partial W_{ij}} O_k=
\sum_{k \in K}(O_k - t_k)\frac{\partial}{\partial w_{ij}} \sigma(x_k)$$
$$= \sum_{k \in K}(O_k - t_k)\sigma(x_k)(1 - \sigma(x_k))\frac{\partial x_k}{\partial W_{ij}} =
\sum_{k \in K}(O_k - t_k)O_k(1 - O_k)\frac{\partial x_k}{\partial O_j}\frac{\partial O_j}{\partial W_{ij}} =
\frac{\partial O_j}{\partial W_{ij}} \sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk}$$
where $W_{jk}$ is weight connected to the previous layer and $\frac{\partial x_k}{\partial O_j} = W_{jk}$, because $x_k = O_j * W_{jk}$, and $O_j = \sigma(x_j)$.
$$\frac{\partial O_j}{\partial W_{ij}} \sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk} = O_j(1 - O_j)\frac{\partial x_j}{\partial W_{ij}}\sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk}$$
$$= O_j(1 - O_j)O_i\sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk}$$
Recalling our definition of $\delta_k$ we can rewrite our final expression as:
$$\frac{\partial E(W)}{\partial W_{ij}^L} = O_j(1 - O_j)O_i \sum_{k \in K}W_{jk} = O_i\delta_j$$
Then use the formula $W_{(t+1)} = W_{t} - \alpha \frac{\partial E(W)}{\partial W}$ in order to update weights.\newline
\end{document}
但格式不太好。
我怎样才能使方程式格式更美观?
答案1
这里有一个可能性,使用amsmath
环境(由 加载mathtools
),nccmath
获得中等大小的公式(介于 textstyle 和 displaystyle 之间),并使用cool
包简化输入偏导数的\pderiv
命令
\documentclass[12pt]{article}
\usepackage[utf8]{inputenc}
\usepackage{geometry}
\usepackage{mathtools, nccmath, cool}
\usepackage{amssymb}
\usepackage{array}
\usepackage{hyperref}
\title{Getting started}
\author{DD}
\date{03/14/15}
\begin{document}
\maketitle
\noindent 1) Output layer:
\[ \pderiv{E(W)}{W_{jk}^L} =\pderiv{}{W_{jk}^L} \frac{1}{2}\!\sum_{k} (O_k - t_k)^2 = (O_k - t_k)\pderiv{}{ W_{jk}}O_k \]
$t_k$ is constant and $O_k = \sigma(x_k)$ by the definition (sigmoid activation function gives an output).
\begin{align*}
(O_k - t_k)\pderiv{}{W_{jk}} O_k & = (O_k - t_k)\pderiv{}{W_{jk}} \sigma(x_k) = (O_k - t_k)\sigma(x_k)(1 - \sigma(x_k))\pderiv{}{W_{jk}} x_k\\
& = (O_k - t_k)\sigma(x_k)(1 - \sigma(x_k)) O_j =
(O_k - t_k)O_k(1 - O_k)O_j,
\end{align*}
where $\medmath{\pderiv{}{W_{jk}}} x_k = O_j$, because $x_k = O_j * W_{jk}$ and derivative w.r.t $W_{jk}$ is equal to $1$.
Let $(O_k - t_k)O_k(1 - O_k) = \delta_k$. Then
\[ \pderiv{E(W)}{W_{jk}^L} = O_j\delta_k. \]
\noindent 2) Hidden layer:
\begin{align*}
\pderiv{E(W)}{{W_{ij}^L}}
& = \pderiv{}{W_{ij}^L}%
\mfrac{1}{2}\sum_{k \in K} (O_k - t_k)^2
= \sum_{k \in K}(O_k - t_k)\pderiv{}{W_{ij}}O_k=
%
\sum_{k \in K}(O_k - t_k)\pderiv{}{ w_{ij}} \sigma(x_k) \\
& = \sum_{k \in K}(O_k - t_k)\sigma(x_k)(1 - \sigma(x_k))\pderiv{x_k}{W_{ij}} =
\sum_{k \in K}(O_k - t_k)O_k(1 - O_k)\pderiv{x_k}{O_j}\pderiv{O_j}{W_{ij}}\\
& = \pderiv{O_j}{W_{ij}} \sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk},
\end{align*}
{where $W_{jk}$ is weight connected to the previous layer and $\medmath{\pderiv{x_k}{O_j}}= W_{jk}$, because $x_k = O_j * W_{jk}$, and $O_j = \sigma(x_j)$.}
\begin{flalign*}
& \pderiv{O_j}{W_{ij}} \mathrlap{\sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk}}\\[-1.5ex]
& & & = O_j(1 - O_j)\pderiv{x_j}{W_{ij}}\sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk} & & \\
& & & = O_j(1 - O_j)O_i\sum_{k \in K}(O_k - t_k)O_k(1 - O_k) W_{jk} \\
\intertext[1ex]{Recalling our definition of $\delta_k$ we can rewrite our final expression as:}
& & \pderiv{E(W)}{W_{ij}^L} & = O_j(1 - O_j)O_i \sum_{k \in K}W_{jk} = O_i\delta_j.
\end{flalign*}
Then use the formula $W_{(t+1)} = W_{t} - \alpha \medmath{\pderiv{E(W)}{W}}$ in order to update weights.\newline
\end{document}