forked from TH_General/Template_Summary
k-NN hinzugefügt.
This commit is contained in:
parent
36ea379b0e
commit
e94e73bed1
@ -73,6 +73,7 @@
|
||||
\newacronym{iid}{iid}{\gls{identically_independently_distributed}}
|
||||
\newacronym{SDG}{SDG}{Stochastic Gradient Descent}
|
||||
\newacronym{LLO}{LLO}{Leave-One-Out}
|
||||
\newacronym{knn}{k"=NN}{k"=Nearest Neighbors}
|
||||
|
||||
%--------------------
|
||||
%nomenclature
|
||||
|
@ -1,3 +1,76 @@
|
||||
\chapter{k-Nearest Neighbors}%
|
||||
\chapter{\glsfmtfull{knn}}%
|
||||
\label{cha:k-Nearest Neighbors}
|
||||
Beim \gls{knn}-Verfahren wird dem System eine Reihe von gelabelten Trainingsdaten übergeben.
|
||||
Für die Klassifizierung erfolgt durch
|
||||
\begin{enumerate}
|
||||
\item Berechnung des Ähnlichkeitsmaßes (\cref{sec:Distance Metrics})\slash der Distanz zu allen bekannten Punkten
|
||||
\item Klassifizierung der Daten durch ein Mehrheitsvotum der $k$ nächsten Nachbarn
|
||||
\end{enumerate}
|
||||
|
||||
{%these brackets force the wrapfigure to behave. No clue why it doesn't do so automatically
|
||||
\section{Distance Metrics}%
|
||||
\label{sec:Distance Metrics}
|
||||
\begin{wrapfigure}{r}{.4\textwidth}
|
||||
\vspace*{-15mm}
|
||||
\centering
|
||||
\includegraphics[width = .9\linewidth]{manhattan_euclidean_1.pdf}\\
|
||||
\vspace*{-7mm}
|
||||
\caption{Euclidean and Manhattan Distance}
|
||||
\label{fig:euclidean_and_manhattan_distance}
|
||||
\vspace*{-3mm}
|
||||
\includegraphics[width = .9\linewidth]{cosine1.pdf}\\
|
||||
\vspace*{-5mm}
|
||||
\caption{Cosine Distance}
|
||||
\label{fig:cosine_distance}
|
||||
\end{wrapfigure}
|
||||
|
||||
\paragraph{Euclidean Distance}%
|
||||
\label{par:Euclidean Distance}
|
||||
\begin{equation} \label{eq:euclidean_distance}
|
||||
d(\bm x,\bm y)) = ||\bm x - \bm y || = \sqrt{\sum^d_{k=1}\left(\bm{x}_k-\bm{y}_k\right)^2}
|
||||
\end{equation}
|
||||
|
||||
\paragraph{Cosine Distance}%
|
||||
\label{par:Cosine Distance}
|
||||
\begin{equation} \label{eq:cosine distance}
|
||||
d(\bm x,\bm y) = 1 - \frac{\bm x^T \bm y}{||\bm c||\cdot||\bm y||}
|
||||
\end{equation}
|
||||
|
||||
\paragraph{Hamming Distance}%
|
||||
\label{par:Hamming Distance}
|
||||
\begin{equation} \label{eq:hamming_distance}
|
||||
d(\bm x,\bm y)=\sum^d_{i=k}(\bm x_k\ne\bm y_k)
|
||||
\end{equation}
|
||||
|
||||
\paragraph{Manhattan Distance}%
|
||||
\label{par:Manhattan Distance}
|
||||
\begin{equation} \label{eq:manhattan_distance}
|
||||
d(\bm x, \bm y)=\sum^d_{k=1}|\bm x_k-\bm y_k|
|
||||
\end{equation}
|
||||
|
||||
\paragraph{Mahalanobis Distance}%
|
||||
\label{par:Mahalanobis Distance}
|
||||
berücksichtigt die Richtung, in die die Daten verteilt sind.\\
|
||||
($\sum^{-1}$ ist die inverse der Kovarianzmatrix)
|
||||
\begin{equation} \label{eq:mahalanobis_distance}
|
||||
d(\bm x,\bm y) = ||\bm x - \bm y||_{\nomeq{covariance}^{-1}}
|
||||
= \sqrt{(\bm x - \bm y)^T\nomeq{covariance}^{-1} (\bm x - \bm y)}
|
||||
\end{equation}
|
||||
}
|
||||
|
||||
\section{Curse of Dimensionality}%
|
||||
\label{sec:Curse of Dimensionality}
|
||||
Je mehr Features für jeden einzelnen Datenpunkt gegeben sind,
|
||||
desto höher ist die Dimensionalität des Features Space.
|
||||
Allerdings ist es umso schwieriger die \gls{knn} zu finden,
|
||||
je höher die Dimensionalität ist,
|
||||
da der Abstand zwischen den Datenpunkten exponentiell mit der Dimensionalität steigt.
|
||||
Folglich muss es ein Ziel sein,
|
||||
die relevanten Features zu ermitteln und alle anderen Features aus der Auswertung zu entfernen.
|
||||
|
||||
\section{Finding the neighbors: KD-Trees}%
|
||||
\label{sec:Finding the neighbors: KD-Trees}
|
||||
\includegraphics[width=.9\textwidth]{KD-Trees1.pdf}\\\\
|
||||
\includegraphics[width=.9\textwidth]{KD-Trees2.pdf}
|
||||
|
||||
|
||||
|
BIN
images/KD-Trees1.pdf
Normal file
BIN
images/KD-Trees1.pdf
Normal file
Binary file not shown.
BIN
images/KD-Trees2.pdf
Normal file
BIN
images/KD-Trees2.pdf
Normal file
Binary file not shown.
471
images/cosine1.pdf
Normal file
471
images/cosine1.pdf
Normal file
File diff suppressed because one or more lines are too long
344
images/manhattan_euclidean_1.pdf
Normal file
344
images/manhattan_euclidean_1.pdf
Normal file
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user