Kernel Regression abgeschlossen

2022-02-13 13:11:20 +01:00 · 2022-02-13 13:11:20 +01:00 · 92ff396b36
commit 92ff396b36
parent ecf712de93
6 changed files with 79 additions and 27 deletions
--- a/Glossary.tex
+++ b/Glossary.tex
@ -84,15 +84,16 @@
 %--------------------
 %add new key
-\glsaddstoragekey{unit}{}{\glsentryunit}
+%\glsaddstoragekey{unit}{}{\glsentryunit}
 \glsnoexpandfields
-\newcommand{\newnom}[5]{
+%\newcommand{\newnom}[5]{
 \newcommand{\newnom}[4]{
    \newglossaryentry{#1}{
        name={#2},
        symbol={#3},
        description={#4},
-        unit={#5},
+        %unit={#5},
        type=nomenclature,
        sort={#1}
    }
@ -108,23 +109,23 @@
 %use nomenclature entry (use in equation)
 \newcommand{\nomeq}[1]{\glslink{#1}{\glsentrysymbol{#1}}}
-\newnom{summed_squared_error}{\gls{SSE}}{\text{\glsxtrshort{SSE}}}{\glsxtrfull{SSE}}{}
+\newnom{summed_squared_error}{\gls{SSE}}{\text{\glsxtrshort{SSE}}}{\glsxtrfull{SSE}}
-\newnom{mean_squared_error}{\gls{MSE}}{\text{\glsxtrshort{MSE}}}{\glsxtrfull{MSE}}{}
+\newnom{mean_squared_error}{\gls{MSE}}{\text{\glsxtrshort{MSE}}}{\glsxtrfull{MSE}}
-\newnom{residual_sum_squares}{\gls{RSS}}{\text{\glsxtrshort{RSS}}}{\glsxtrfull{RSS}}{}
+\newnom{residual_sum_squares}{\gls{RSS}}{\text{\glsxtrshort{RSS}}}{\glsxtrfull{RSS}}
-\newnom{gaussian_noise}{Gausches Rauschen}{\epsilon}{zufällige (normalverteilte) Abweichung}{}
+\newnom{gaussian_noise}{Gausches Rauschen}{\epsilon}{zufällige (normalverteilte) Abweichung}
-\newnom{vector_valued_function}{vektorwertige Funktion}{\phi(\bm{x})}{vektorwertige Funktion der des Eingangsvektor $\bm{x}$}{}
+\newnom{vector_valued_function}{vektorwertige Funktion}{\bm\phi(\bm{x})}{vektorwertige Funktion der des Eingangsvektor $\bm{x}$}
-\newnom{regularization_factor}{Regularisierungsfaktor}{\lambda}{}{}
+\newnom{regularization_factor}{Regularisierungsfaktor}{\lambda}{}
-\newnom{identity_matrix}{Identitätsmatrix}{\bm{I}}{$\begin{bmatrix} 1 & 0 & \cdots & 0 \\ 0 & 1 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots\\ 0 & 0 & \cdots & 1 \end{bmatrix}$}{}
+\newnom{identity_matrix}{Identitätsmatrix}{\bm{I}}{$\begin{bmatrix} 1 & 0 & \cdots & 0 \\ 0 & 1 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots\\ 0 & 0 & \cdots & 1 \end{bmatrix}$}
-\newnom{probability_mass_function}{Probability Mass Function}{p(x)}{Wahrscheinlichkeitsdichte-\slash\,Wahrscheinlichkeitsmassefunktion}{}
+\newnom{probability_mass_function}{Probability Mass Function}{p(x)}{Wahrscheinlichkeitsdichte-\slash\,Wahrscheinlichkeitsmassefunktion}
-\newnom{mean}{arithmetisches Mittel}{\mu}{}{}
+\newnom{mean}{arithmetisches Mittel}{\mu}{}
-\newnom{mean-vector}{Mittelwerts-Vektor}{\bm{\mu}}{}{}
+\newnom{mean-vector}{Mittelwerts-Vektor}{\bm{\mu}}{}
-\newnom{covariance}{Kovarianz-Matrix}{\bm{\Sigma}}{}{}
+\newnom{covariance}{Kovarianz-Matrix}{\bm{\Sigma}}{}
-\newnom{variance}{Varianz}{\sigma^2}{$\mathbb{E}_p[(X-\nomeq{mean})$]}{}
+\newnom{variance}{Varianz}{\sigma^2}{$\mathbb{E}_p[(X-\nomeq{mean})$]}
-\newnom{sigmoid}{Sigmoid Function}{\sigma}{}{}
+\newnom{sigmoid}{Sigmoid Function}{\sigma}{}
-\newnom{learning_rate}{Learning Rate}{\eta}{}{}
+\newnom{learning_rate}{Learning Rate}{\eta}{}
-\newnom{kernel_matrix}{Kernel Matrix}{\bm{K}}{}{}
+\newnom{kernel_matrix}{Kernel Matrix}{\bm{K}}{}
-\newnom{kernel_function}{Kernel Function}{k}{}{}
+\newnom{kernel_function}{Kernel Function}{k}{}
-\newnom{kernel_vector}{Kernel Vector}{\bm{k}}{}{}
+\newnom{kernel_vector}{Kernel Vector}{\bm{k}}{}
 \shorthandoff{"}
 \makeglossaries
--- a/Preface.tex
+++ b/Preface.tex
@ -25,17 +25,21 @@
    \renewcommand{\glsgroupskip}{}%avoids grouping the elements by alphabetical order
    \renewenvironment{theglossary}{% Change the table type --> 4 columns
        \renewcommand*{\arraystretch}{1.5}
-    \begin{longtable}{>{\centering}p{.1\textwidth} >{\arraybackslash}p{.225\textwidth} p{.475\textwidth}>{\centering\arraybackslash}p{.1\textwidth}}}%
+    %\begin{longtable}{>{\centering}p{.1\textwidth} >{\arraybackslash}p{.225\textwidth} p{.475\textwidth}>{\centering\arraybackslash}p{.1\textwidth}}}%
    \begin{longtable}{>{\centering}p{.1\textwidth} >{\arraybackslash}p{.225\textwidth} p{.575\textwidth}}}%
    {\end{longtable}}%
    %
    \renewcommand*{\glossaryheader}{%  Change the table header
-        \bfseries\large Symbol & \bfseries\large Bezeichnung & \large\bfseries Beschreibung & \large\bfseries Einheit\\
+        %\bfseries\large Symbol & \bfseries\large Bezeichnung & \large\bfseries Beschreibung & \large\bfseries Einheit\\
        \bfseries\large Symbol & \bfseries\large Bezeichnung & \large\bfseries Beschreibung\\
    \hline\endhead}%
    \renewcommand*{\glossentry}[2]{%  Change the displayed items
-        \boldmath\ensuremath{\glossentrysymbol{##1}}
+        %\boldmath\ensuremath{\glossentrysymbol{##1}}
        \ensuremath{\glossentrysymbol{##1}}
                               & \glstarget{##1}{\hspace*{0pt}\glossentryname{##1}} %
                               & \glossentrydesc{##1}
-                               & \glsentryunit{##1}\tabularnewline
+                               %& \glsentryunit{##1}
                               \tabularnewline
                           }%
                       }
 \printglossary[type=nomenclature, nonumberlist, style=symbunitlong]
--- a/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex
+++ b/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex
@ -81,7 +81,7 @@ welchen Anteil die Klasse $k$ auf der linken Seite des Splits hat.
 \subsubsection{Classification Tree}%
 \label{ssub:Classification Tree}
 \includegraphics[width=.6\textwidth]{classification_tree.png}
-{\color{red} Herleitung Vorlesung 03 Seite 24-31}
+{\color{red} Herleitung Vorlesung 04 Seite 24-31}
 \subsubsection{Regression Tree}%
 \label{ssub:Regression Tree}
@ -96,7 +96,7 @@ Predict (log) prostate specific antigen from
 \end{itemize}
 }
 \vspace*{30mm}
-{\color{red} Herleitung Vorlesung 03 Seite 32-36}
+{\color{red} Herleitung Vorlesung 04 Seite 32-36}
 \section{Random Forests}%
 \label{sec:Random Forests}
--- a/chapters/Kernel_Methods/Kernel-Regression.tex
+++ b/chapters/Kernel_Methods/Kernel-Regression.tex
@ -1,5 +1,52 @@
 \chapter{Kernel-Regression}%
 \label{cha:Kernel-Regression}
 Die Kernel Regression ist das Äquivalent der Linear \nameref{sub:Ridge Regression} (\cref{sub:Ridge Regression}),
 weshalb es auch oft als Kernel Ridge Regression bezeichnet wird.
 Die Linear Ridge Regression ist allerdings für den linearen Feature Space gedacht
 und lässt sich nicht direkt in einem Feature Space mit unendlicher Dimension anwenden.
-WEITER AUF FOLIE 294
+Mithilfe eines mathematischen Tricks (aus dem Matrix Cookook) lässt sich die Lösung der Ridge Regression so umstellen,
 dass statt einer $d\times d$ Matrix lediglich eine $N\times N$ Matrix invertiert werden muss:
 \begin{equation}
    \bm w^* = \underbrace{(\bm\Phi^T\bm\Phi + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}}_{\text{$d\times d$ matrix inversion}}\bm\Phi^T\bm y
    = \bm\Phi^T\underbrace{(\bm\Phi\bm\Phi^T + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}}_{\text{$N\times N$ matrix inversion}}\bm y 
 \end{equation}
 Nun erlaubt es die Verwendung einer \nomf{kernel_matrix} (\cref{cha:Kernel Basics}),
 die Gleichung weiter zu vereinfachen:
 \begin{equation}
    \bm w^* = \bm\Phi^T\underbrace{(\bm\Phi\bm\Phi^T + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}}_{\text{$N\times N$ matrix inversion}}\bm y 
    = \bm\Phi^T \underbrace{(\nomeq{kernel_matrix} + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}\bm y}_{\bm \alpha}
    = \bm\Phi^T \bm\alpha
 \end{equation}
 Allerdings besteht weiterhin das Problem,
 dass $\bm w^* \mathbb{R}^d$ eine potentiell unendlich große Dimension hat
 und daher nicht dargestellt oder abgespeichert werden kann.
 Allerdings ermöglicht es und die Beschreibung mithilfe des Kernels,
 eine Funktion $f(\bm x)$,
 die $\bm w^*$ verwendet auszuwerten:
 \begin{equation}
    f(\bm x) = \nomeq{vector_valued_function}^T\bm w^*
    = \nomeq{vector_valued_function}^T\bm\Phi^T\bm\alpha
    = \nomeq{kernel_vector}(\bm x)^T\bm\alpha
    = \sum_i \alpha_i \nomeq{kernel_function}(\bm x_i,\bm x)
 \end{equation}
 Die Lösung der Kernel Ridge Regression wird daher gegeben durch:
 \begin{equation} \label{eq:kernel_ridge_regression_solution}
    f^*(\bm x) = \nomeq{kernel_vector}(\bm x)^T (\nomeq{kernel_matrix} + \nomeq{regularization_factor}\nomeq{identity_matrix})^{-1}\bm y
 \end{equation}
 \section{Selecting the hyper-parameters}%
 \label{sub:Selecting the hyper-parameters}
 Die Auswahl der passenden Hyperparameter (z.B. \nomsym{variance} für den \nameref{sub:Gaussian Kernel}) ist ein Model Selection Problem (\cref{cha:Model Selection}).
 \begin{figure}[H]
    \centering
    \includegraphics[width=0.5\textwidth]{gaussian_kernel_model_selection.png}
    \caption{\nameref{cha:Model Selection} Problem für einen \nameref{sub:Gaussian Kernel}}
    \label{fig:gaussian_kernel_model_selection}
 \end{figure}
 \section{Examples and comparison to \glsxtrshort{RBF} regression}%
 \label{sec:Examples and comparison to RBF regression}
 \begin{center}
    \includegraphics[width=.9\textwidth]{kernel_regression_comparison.pdf}
 \end{center}
--- a/images/gaussian_kernel_model_selection.png
+++ b/images/gaussian_kernel_model_selection.png
--- a/images/kernel_regression_comparison.pdf
+++ b/images/kernel_regression_comparison.pdf