Alle Optimierungen abgeschlossen.

2022-02-21 15:29:29 +01:00
parent b156e620d0
commit d378e4291b
11 changed files with 113 additions and 108 deletions
--- a/chapters/Neural_Networks/Basics.tex
+++ b/chapters/Neural_Networks/Basics.tex
@@ -57,7 +57,6 @@ ergibt sich durch:
    \bm y = \nomeq{activation_function}(\bm W\bm x + \bm b)
 \end{equation}
 \begin{wrapfigure}{r}{.3\textwidth}
-    \vspace*{-8mm}
    \centering
    \includegraphics[width=0.8\linewidth]{feedforward_neural_network_composition.png}
    \caption{Feedforward Neural Network mit Funktionen}
@@ -92,15 +91,15 @@ ab welchem Schwellwert das Produkt aus Eingangswerten und Gewichten zu relevante
 In den meisten Fällen wird die \glsxtrshort{ReLU} \noms{activation_function} verwendet,
 wobei es sich auch lohnt, die Leaky \glsxtrshort{ReLU} oder \glsxtrshort{ELU} auszubrobieren.
 Die Sigmoid Funktion (\cref{ssub:Logistic sigmoid function}) sollte ausschließlich als \noms{activation_function} in Klassifikationsproblemen verwendet werden.\\
-\includegraphics[scale=.7]{sigmoid_activation_function.png}\\
+\includegraphics[scale=.6]{sigmoid_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{tanh_activation_function.png}\\
+\includegraphics[scale=.6]{tanh_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{ReLU_activation_function.png}\\
+\includegraphics[scale=.6]{ReLU_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{Leaky_ReLU_activation_function.png}\\
+\includegraphics[scale=.6]{Leaky_ReLU_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{exponential_linear_units_activation_function.png}\\
+\includegraphics[scale=.6]{exponential_linear_units_activation_function.png}\\


 \section{Optimization}%
--- a/chapters/Neural_Networks/Gradient_Descent.tex
+++ b/chapters/Neural_Networks/Gradient_Descent.tex
@@ -39,6 +39,7 @@ Hier berechnet sich der Loss durch
    \mathcal L &= \frac{1}{2}(y-t)^2
 \end{align}
 Für dieses Neural Network ist die Backpropagation dann
+
 \begin{alignat}{5} \label{eq:backward_pass}
    \frac{\partial \mathcal L}{\partial y} &= y - t && 
                                           &&=\overline{y}\\
@@ -163,17 +164,17 @@ bei denen die Lernrate abhängig von der Anzahl der Durchläufe des \nameref{cha
    \begin{tabularx}{\textwidth}{X|Y|Y}
        \bfseries\centering Verfahren & \bfseries Learning Rate & \bfseries Training Loss\\
        \hline
-        \textbf{Step:} Lernrate verändert sich nach einer bestimmten Anzahl von Algorithmus-Durchläufen & & \includegraphics[width=\linewidth,align=c]{learning_rate_decay_step.png}\\
+        \textbf{Step:} Lernrate verändert sich nach einer bestimmten Anzahl von Algorithmus-Durchläufen & & \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_step.png}\\
        \hline
        \textbf{Cosine:}$\alpha_t = \frac{1}{2}\alpha_0(1+\cos(\frac{t\pi}{T}))$ &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_cosine_learning_rate.png} &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_cosine_training_loss.png} \\
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_cosine_learning_rate.png} &
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_cosine_training_loss.png} \\
        \hline
        \textbf{Linear:}$\alpha_t = \alpha_0(1-\frac{t}{T})$ &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_linear_learning_rate.png} & \\
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_linear_learning_rate.png} & \\
        \hline
        \textbf{Inverse sqrt:}$\alpha_t = \frac{\alpha_0}{\sqrt{t}}$ &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_inverse_sqrt.png} & \\
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_inverse_sqrt.png} & \\
    \end{tabularx}
    ($\alpha_0$: inital learning rate, $\alpha_t$: learning rate at epoch $t$, $T$: total number of epochs) 
 \end{table}