Kernel Regression abgeschlossen

2022-02-13 13:11:20 +01:00 · 2022-02-13 13:11:20 +01:00 · 92ff396b36
commit 92ff396b36
parent ecf712de93
6 changed files with 79 additions and 27 deletions
--- a/Glossary.tex
+++ b/Glossary.tex
@ -84,15 +84,16 @@
 %--------------------

 %add new key
-\glsaddstoragekey{unit}{}{\glsentryunit}
+%\glsaddstoragekey{unit}{}{\glsentryunit}
 \glsnoexpandfields

-\newcommand{\newnom}[5]{
+%\newcommand{\newnom}[5]{
+\newcommand{\newnom}[4]{
    \newglossaryentry{#1}{
        name={#2},
        symbol={#3},
        description={#4},
-        unit={#5},
+        %unit={#5},
        type=nomenclature,
        sort={#1}
    }
@ -108,23 +109,23 @@
 %use nomenclature entry (use in equation)
 \newcommand{\nomeq}[1]{\glslink{#1}{\glsentrysymbol{#1}}}

-\newnom{summed_squared_error}{\gls{SSE}}{\text{\glsxtrshort{SSE}}}{\glsxtrfull{SSE}}{}
-\newnom{mean_squared_error}{\gls{MSE}}{\text{\glsxtrshort{MSE}}}{\glsxtrfull{MSE}}{}
-\newnom{residual_sum_squares}{\gls{RSS}}{\text{\glsxtrshort{RSS}}}{\glsxtrfull{RSS}}{}
-\newnom{gaussian_noise}{Gausches Rauschen}{\epsilon}{zufällige (normalverteilte) Abweichung}{}
-\newnom{vector_valued_function}{vektorwertige Funktion}{\phi(\bm{x})}{vektorwertige Funktion der des Eingangsvektor $\bm{x}$}{}
-\newnom{regularization_factor}{Regularisierungsfaktor}{\lambda}{}{}
-\newnom{identity_matrix}{Identitätsmatrix}{\bm{I}}{$\begin{bmatrix} 1 & 0 & \cdots & 0 \\ 0 & 1 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots\\ 0 & 0 & \cdots & 1 \end{bmatrix}$}{}
-\newnom{probability_mass_function}{Probability Mass Function}{p(x)}{Wahrscheinlichkeitsdichte-\slash\,Wahrscheinlichkeitsmassefunktion}{}
-\newnom{mean}{arithmetisches Mittel}{\mu}{}{}
-\newnom{mean-vector}{Mittelwerts-Vektor}{\bm{\mu}}{}{}
-\newnom{covariance}{Kovarianz-Matrix}{\bm{\Sigma}}{}{}
-\newnom{variance}{Varianz}{\sigma^2}{$\mathbb{E}_p[(X-\nomeq{mean})$]}{}
-\newnom{sigmoid}{Sigmoid Function}{\sigma}{}{}
-\newnom{learning_rate}{Learning Rate}{\eta}{}{}
-\newnom{kernel_matrix}{Kernel Matrix}{\bm{K}}{}{}
-\newnom{kernel_function}{Kernel Function}{k}{}{}
-\newnom{kernel_vector}{Kernel Vector}{\bm{k}}{}{}
+\newnom{summed_squared_error}{\gls{SSE}}{\text{\glsxtrshort{SSE}}}{\glsxtrfull{SSE}}
+\newnom{mean_squared_error}{\gls{MSE}}{\text{\glsxtrshort{MSE}}}{\glsxtrfull{MSE}}
+\newnom{residual_sum_squares}{\gls{RSS}}{\text{\glsxtrshort{RSS}}}{\glsxtrfull{RSS}}
+\newnom{gaussian_noise}{Gausches Rauschen}{\epsilon}{zufällige (normalverteilte) Abweichung}
+\newnom{vector_valued_function}{vektorwertige Funktion}{\bm\phi(\bm{x})}{vektorwertige Funktion der des Eingangsvektor $\bm{x}$}
+\newnom{regularization_factor}{Regularisierungsfaktor}{\lambda}{}
+\newnom{identity_matrix}{Identitätsmatrix}{\bm{I}}{$\begin{bmatrix} 1 & 0 & \cdots & 0 \\ 0 & 1 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots\\ 0 & 0 & \cdots & 1 \end{bmatrix}$}
+\newnom{probability_mass_function}{Probability Mass Function}{p(x)}{Wahrscheinlichkeitsdichte-\slash\,Wahrscheinlichkeitsmassefunktion}
+\newnom{mean}{arithmetisches Mittel}{\mu}{}
+\newnom{mean-vector}{Mittelwerts-Vektor}{\bm{\mu}}{}
+\newnom{covariance}{Kovarianz-Matrix}{\bm{\Sigma}}{}
+\newnom{variance}{Varianz}{\sigma^2}{$\mathbb{E}_p[(X-\nomeq{mean})$]}
+\newnom{sigmoid}{Sigmoid Function}{\sigma}{}
+\newnom{learning_rate}{Learning Rate}{\eta}{}
+\newnom{kernel_matrix}{Kernel Matrix}{\bm{K}}{}
+\newnom{kernel_function}{Kernel Function}{k}{}
+\newnom{kernel_vector}{Kernel Vector}{\bm{k}}{}
 \shorthandoff{"}

 \makeglossaries
--- a/Preface.tex
+++ b/Preface.tex
@ -25,17 +25,21 @@
    \renewcommand{\glsgroupskip}{}%avoids grouping the elements by alphabetical order
    \renewenvironment{theglossary}{% Change the table type --> 4 columns
        \renewcommand*{\arraystretch}{1.5}
-    \begin{longtable}{>{\centering}p{.1\textwidth} >{\arraybackslash}p{.225\textwidth} p{.475\textwidth}>{\centering\arraybackslash}p{.1\textwidth}}}%
+    %\begin{longtable}{>{\centering}p{.1\textwidth} >{\arraybackslash}p{.225\textwidth} p{.475\textwidth}>{\centering\arraybackslash}p{.1\textwidth}}}%
+    \begin{longtable}{>{\centering}p{.1\textwidth} >{\arraybackslash}p{.225\textwidth} p{.575\textwidth}}}%
    {\end{longtable}}%
    %
    \renewcommand*{\glossaryheader}{%  Change the table header
-        \bfseries\large Symbol & \bfseries\large Bezeichnung & \large\bfseries Beschreibung & \large\bfseries Einheit\\
+        %\bfseries\large Symbol & \bfseries\large Bezeichnung & \large\bfseries Beschreibung & \large\bfseries Einheit\\
+        \bfseries\large Symbol & \bfseries\large Bezeichnung & \large\bfseries Beschreibung\\
    \hline\endhead}%
    \renewcommand*{\glossentry}[2]{%  Change the displayed items
-        \boldmath\ensuremath{\glossentrysymbol{##1}}
+        %\boldmath\ensuremath{\glossentrysymbol{##1}}
+        \ensuremath{\glossentrysymbol{##1}}
                               & \glstarget{##1}{\hspace*{0pt}\glossentryname{##1}} %
                               & \glossentrydesc{##1}
-                               & \glsentryunit{##1}\tabularnewline
+                               %& \glsentryunit{##1}
+                               \tabularnewline
                           }%
                       }
 \printglossary[type=nomenclature, nonumberlist, style=symbunitlong]
--- a/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex
+++ b/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex
@ -81,7 +81,7 @@ welchen Anteil die Klasse $k$ auf der linken Seite des Splits hat.
 \subsubsection{Classification Tree}%
 \label{ssub:Classification Tree}
 \includegraphics[width=.6\textwidth]{classification_tree.png}
-{\color{red} Herleitung Vorlesung 03 Seite 24-31}
+{\color{red} Herleitung Vorlesung 04 Seite 24-31}

 \subsubsection{Regression Tree}%
 \label{ssub:Regression Tree}
@ -96,7 +96,7 @@ Predict (log) prostate specific antigen from
 \end{itemize}
 }
 \vspace*{30mm}
-{\color{red} Herleitung Vorlesung 03 Seite 32-36}
+{\color{red} Herleitung Vorlesung 04 Seite 32-36}

 \section{Random Forests}%
 \label{sec:Random Forests}
--- a/chapters/Kernel_Methods/Kernel-Regression.tex
+++ b/chapters/Kernel_Methods/Kernel-Regression.tex
@ -1,5 +1,52 @@
 \chapter{Kernel-Regression}%
 \label{cha:Kernel-Regression}
+Die Kernel Regression ist das Äquivalent der Linear \nameref{sub:Ridge Regression} (\cref{sub:Ridge Regression}),
+weshalb es auch oft als Kernel Ridge Regression bezeichnet wird.
+Die Linear Ridge Regression ist allerdings für den linearen Feature Space gedacht
+und lässt sich nicht direkt in einem Feature Space mit unendlicher Dimension anwenden.

-WEITER AUF FOLIE 294
+Mithilfe eines mathematischen Tricks (aus dem Matrix Cookook) lässt sich die Lösung der Ridge Regression so umstellen,
+dass statt einer $d\times d$ Matrix lediglich eine $N\times N$ Matrix invertiert werden muss:
+\begin{equation}
+    \bm w^* = \underbrace{(\bm\Phi^T\bm\Phi + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}}_{\text{$d\times d$ matrix inversion}}\bm\Phi^T\bm y
+    = \bm\Phi^T\underbrace{(\bm\Phi\bm\Phi^T + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}}_{\text{$N\times N$ matrix inversion}}\bm y 
+\end{equation}
+Nun erlaubt es die Verwendung einer \nomf{kernel_matrix} (\cref{cha:Kernel Basics}),
+die Gleichung weiter zu vereinfachen:
+\begin{equation}
+    \bm w^* = \bm\Phi^T\underbrace{(\bm\Phi\bm\Phi^T + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}}_{\text{$N\times N$ matrix inversion}}\bm y 
+    = \bm\Phi^T \underbrace{(\nomeq{kernel_matrix} + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}\bm y}_{\bm \alpha}
+    = \bm\Phi^T \bm\alpha
+\end{equation}
+Allerdings besteht weiterhin das Problem,
+dass $\bm w^* \mathbb{R}^d$ eine potentiell unendlich große Dimension hat
+und daher nicht dargestellt oder abgespeichert werden kann.
+Allerdings ermöglicht es und die Beschreibung mithilfe des Kernels,
+eine Funktion $f(\bm x)$,
+die $\bm w^*$ verwendet auszuwerten:
+\begin{equation}
+    f(\bm x) = \nomeq{vector_valued_function}^T\bm w^*
+    = \nomeq{vector_valued_function}^T\bm\Phi^T\bm\alpha
+    = \nomeq{kernel_vector}(\bm x)^T\bm\alpha
+    = \sum_i \alpha_i \nomeq{kernel_function}(\bm x_i,\bm x)
+\end{equation}
+Die Lösung der Kernel Ridge Regression wird daher gegeben durch:
+\begin{equation} \label{eq:kernel_ridge_regression_solution}
+    f^*(\bm x) = \nomeq{kernel_vector}(\bm x)^T (\nomeq{kernel_matrix} + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}\bm y
+\end{equation}

+\section{Selecting the hyper-parameters}%
+\label{sub:Selecting the hyper-parameters}
+Die Auswahl der passenden Hyperparameter (z.B. \nomsym{variance} für den \nameref{sub:Gaussian Kernel}) ist ein Model Selection Problem (\cref{cha:Model Selection}).
+\begin{figure}[H]
+    \centering
+    \includegraphics[width=0.5\textwidth]{gaussian_kernel_model_selection.png}
+    \caption{\nameref{cha:Model Selection} Problem für einen \nameref{sub:Gaussian Kernel}}
+    \label{fig:gaussian_kernel_model_selection}
+\end{figure}
+
+\section{Examples and comparison to \glsxtrshort{RBF} regression}%
+\label{sec:Examples and comparison to RBF regression}
+\begin{center}
+    \includegraphics[width=.9\textwidth]{kernel_regression_comparison.pdf}
+\end{center}
--- a/images/gaussian_kernel_model_selection.png
+++ b/images/gaussian_kernel_model_selection.png
--- a/images/kernel_regression_comparison.pdf
+++ b/images/kernel_regression_comparison.pdf